Create following tables in MySQL and populate with data.
Run following sqoop command to import data from tables to HDFS.
$ sqoop import --connect jdbc:mysql://localhost/test --username root --table SCD --target-dir scd_dir
$ sqoop import --connect jdbc:mysql://localhost/test --username root --table SourceData --target-dir sourcedata_dir
Run following command on grunt command line to create bag
grunt> bag_scd = load 'scd_dir/part-m-00000' using PigStorage(',') as (id:int,name:chararray,class:int,location:chararray,typeacc:chararray,doj:chararray,stdate:chararray,enddate:chararray,flag:chararray);
bag_snapshot = load 'source_data_dir/part-m-00000' using PigStorage(',') as (id:int,name:chararray,class:int,location:chararray,typeacc:chararray,doj:chararray);
Run the following pig command to left outer join source data & scd tables.
grunt> bag_inserted = join bag_snapshot by id left outer, bag_scd by id;
Run the following pig command to dump data stored in bag_inserted.
(1010,Kumar,1,Hyd,Savings,2/24/2015,,,,,,,,,)
(1011,Harish,2,Hyd,Savings,10/5/2008,1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y)
(1012,Mohan,2,Bang,Savings,10/5/2012,1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y)
(1013,Krishna,3,Hyd,Current,3/15/2012,1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y)
(1017,Jani,1,Chen,Current,6/15/2012,1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y)
(1018,Radha,1,Pune,Savings,2/24/2015,,,,,,,,,)
grunt> describe bag_update1;
bag_update1: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_update4 = FILTER bag_update1 BY flag == 'Y';
grunt> dump bag_update4;
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y)
grunt> describe bag_update4;
bag_update4: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_scd_n = filter bag_scd by flag == 'N';
grunt> dump bag_scd_n;
(1014,Suresh,3,Bang,Savings,1/25/2009,1/25/2009,2/10/2015,N)
grunt> describe bag_scd_n;
bag_scd_n: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_deleted_ex = foreach bag_deleted4 generate id,name,class,location,typeacc,doj,stdate,'24-2-2015' as enddate,'N' as flag;
grunt> dump bag_update_ex;
1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,24-2-2015,N)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,24-2-2015,N)
grunt> describe bag_update_ex;
bag_update_ex: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> describe bag_update_ex2_unuse;
bag_update_ex2_unuse: {bag_snapshot::id: int,bag_snapshot::name: chararray,bag_snapshot::class: int,bag_snapshot::location: chararray,bag_snapshot::typeacc: chararray,bag_snapshot::doj: chararray,bag_update4::id: int,bag_update4::name: chararray,bag_update4::class: int,bag_update4::location: chararray,bag_update4::typeacc: chararray,bag_update4::doj: chararray,bag_update4::stdate: chararray,bag_update4::enddate: chararray,bag_update4::flag: chararray}
grunt> bag_update_ex3 = foreach bag_update_ex2_unuse generate bag_snapshot::id as id,bag_snapshot::name as name,bag_snapshot::class as class,bag_snapshot::location as location,bag_snapshot::typeacc as typeacc,bag_snapshot::doj as doj,'25-2-2015' as stdate,'31-12-9999' as enddate,'Y' as flag;
(1012,Mohan,2,Bang,Savings,10/5/2012,25-2-2015,31-12-9999,Y)
(1013,Krishna,3,Hyd,Current,3/15/2012,25-2-2015,31-12-9999,Y)
grunt> describe bag_update_ex3;
bag_update_ex3: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> dump bag_active_unuse2;
grunt> dump bag_active_unuse5;
(1012,Mohan,2,Bang,Savings,10/5/2012,25-2-2015,31-12-9999,Y)
(1013,Krishna,3,Hyd,Current,3/15/2012,25-2-2015,31-12-9999,Y)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,24-2-2015,N)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,24-2-2015,N)
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,24-2-2015,N)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,24-2-2015,N)
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y)
(1010,Kumar,1,Hyd,Savings,2/24/2015,25-2-2015,31-12-9999,Y)
(1018,Radha,1,Pune,Savings,2/24/2015,25-2-2015,31-12-9999,Y)
grunt> STORE bag_union into 'scd_dir/scd2file' using PigStorage(',');
grunt> cat scd_dir/scd2file
1022,Mohan,1,Bang,Current,1/25/2012,1/25/2012,1/1/2015,N
1014,Suresh,3,Bang,Savings,1/25/2009,1/25/2009,2/10/2015,N
1012,Mohan,2,Bang,Savings,10/5/2012,25-2-2015,31-12-9999,Y
1013,Krishna,3,Hyd,Current,3/15/2012,25-2-2015,31-12-9999,Y
1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,24-2-2015,N
1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,24-2-2015,N
1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,24-2-2015,N
1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,24-2-2015,N
1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y
1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y
1010,Kumar,1,Hyd,Savings,2/24/2015,25-2-2015,31-12-9999,Y
1018,Radha,1,Pune,Savings,2/24/2015,25-2-2015,31-12-9999,Y
grunt> dump bag_union;
Run following sqoop command to import data from tables to HDFS.
$ sqoop import --connect jdbc:mysql://localhost/test --username root --table SCD --target-dir scd_dir
$ sqoop import --connect jdbc:mysql://localhost/test --username root --table SourceData --target-dir sourcedata_dir
Run following command on grunt command line to create bag
grunt> bag_scd = load 'scd_dir/part-m-00000' using PigStorage(',') as (id:int,name:chararray,class:int,location:chararray,typeacc:chararray,doj:chararray,stdate:chararray,enddate:chararray,flag:chararray);
bag_snapshot = load 'source_data_dir/part-m-00000' using PigStorage(',') as (id:int,name:chararray,class:int,location:chararray,typeacc:chararray,doj:chararray);
Run the following pig command to left outer join source data & scd tables.
grunt> bag_inserted = join bag_snapshot by id left outer, bag_scd by id;
Run the following pig command to dump data stored in bag_inserted.
(1010,Kumar,1,Hyd,Savings,2/24/2015,,,,,,,,,)
(1011,Harish,2,Hyd,Savings,10/5/2008,1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y)
(1012,Mohan,2,Bang,Savings,10/5/2012,1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y)
(1013,Krishna,3,Hyd,Current,3/15/2012,1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y)
(1017,Jani,1,Chen,Current,6/15/2012,1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y)
(1018,Radha,1,Pune,Savings,2/24/2015,,,,,,,,,)
Run following command to describe bag_inserted.
grunt> describe bag_inserted;
bag_inserted: {bag_snapshot::id: int,bag_snapshot::name: chararray,bag_snapshot::class: int,bag_snapshot::location: chararray,bag_snapshot::typeacc: chararray,bag_snapshot::doj: chararray,bag_scd::id: int,bag_scd::name: chararray,bag_scd::class: int,bag_scd::location: chararray,bag_scd::typeacc: chararray,bag_scd::doj: chararray,bag_scd::stdate: chararray,bag_scd::enddate: chararray,bag_scd::flag: chararray}
Run following command to filter SCD rows with NULL id. These rows are new join records.
grunt> bag_inserted2 = filter bag_inserted by bag_scd::id is NULL;
Run following command to dump values of bag_inserted2.
grunt> dump bag_inserted2;
(1010,Kumar,1,Hyd,Savings,2/24/2015,,,,,,,,,)
(1018,Radha,1,Pune,Savings,2/24/2015,,,,,,,,,)
grunt> describe bag_inserted2;
bag_inserted2: {bag_snapshot::id: int,bag_snapshot::name: chararray,bag_snapshot::class: int,bag_snapshot::location: chararray,bag_snapshot::typeacc: chararray,bag_snapshot::doj: chararray,bag_scd::id: int,bag_scd::name: chararray,bag_scd::class: int,bag_scd::location: chararray,bag_scd::typeacc: chararray,bag_scd::doj: chararray,bag_scd::stdate: chararray,bag_scd::enddate: chararray,bag_scd::flag: chararray}
Run following command to output new join records with just the fields in source data table.
grunt> bag_inserted3 = FOREACH bag_inserted2 generate bag_snapshot::id as id,bag_snapshot::name as name,bag_snapshot::class as class,bag_snapshot::location as location,bag_snapshot::typeacc as typeacc,bag_snapshot::doj as doj;
grunt> dump bag_inserted3;
(1010,Kumar,1,Hyd,Savings,2/24/2015)
(1018,Radha,1,Pune,Savings,2/24/2015)
grunt> describe bag_inserted3;
bag_inserted3: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray}
Run following commands to identify deleted records. Note bag_scd left out join.
bag_deleted = join bag_scd by id left outer,bag_snapshot by id;
grunt> dump bag_deleted;
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y,1011,Harish,2,Hyd,Savings,10/5/2008)
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y,1012,Mohan,2,Bang,Savings,10/5/2012)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y,1013,Krishna,3,Hyd,Current,3/15/2012)
(1014,Suresh,3,Bang,Savings,1/25/2009,1/25/2009,2/10/2015,N,,,,,,)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,10/8/2020,Y,,,,,,)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y,1017,Jani,1,Chen,Current,6/15/2012)
(1022,Mohan,1,Bang,Current,1/25/2012,1/25/2012,1/1/2015,N,,,,,,)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,10/8/2020,Y,,,,,,)
grunt> describe bag_deleted;
bag_deleted: {bag_scd::id: int,bag_scd::name: chararray,bag_scd::class: int,bag_scd::location: chararray,bag_scd::typeacc: chararray,bag_scd::doj: chararray,bag_scd::stdate: chararray,bag_scd::enddate: chararray,bag_scd::flag: chararray,bag_snapshot::id: int,bag_snapshot::name: chararray,bag_snapshot::class: int,bag_snapshot::location: chararray,bag_snapshot::typeacc: chararray,bag_snapshot::doj: chararray}
grunt>bag_deleted2 = filter bag_deleted by bag_snapshot::id is NULL;
grunt> dump bag_deleted2;
(1014,Suresh,3,Bang,Savings,1/25/2009,1/25/2009,2/10/2015,N,,,,,,)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,10/8/2020,Y,,,,,,)
(1022,Mohan,1,Bang,Current,1/25/2012,1/25/2012,1/1/2015,N,,,,,,)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,10/8/2020,Y,,,,,,)
grunt> describe bag_deleted2;
bag_deleted2: {bag_scd::id: int,bag_scd::name: chararray,bag_scd::class: int,bag_scd::location: chararray,bag_scd::typeacc: chararray,bag_scd::doj: chararray,bag_scd::stdate: chararray,bag_scd::enddate: chararray,bag_scd::flag: chararray,bag_snapshot::id: int,bag_snapshot::name: chararray,bag_snapshot::class: int,bag_snapshot::location: chararray,bag_snapshot::typeacc: chararray,bag_snapshot::doj: chararray}
grunt>bag_deleted3 = FOREACH bag_deleted2 generate bag_scd::id as id,bag_scd::name as name,bag_scd::class as class,bag_scd::location as location,bag_scd::typeacc as typeacc,bag_scd::doj as doj,bag_scd::stdate as stdate,bag_scd::enddate as enddate,bag_scd::flag as flag;
grunt> dump bag_deleted3;
(1014,Suresh,3,Bang,Savings,1/25/2009,1/25/2009,2/10/2015,N)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,10/8/2020,Y)
(1022,Mohan,1,Bang,Current,1/25/2012,1/25/2012,1/1/2015,N)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,10/8/2020,Y)
grunt> describe bag_deleted3;
bag_deleted3: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_deleted4 = FILTER bag_deleted3 BY flag == 'Y';
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,10/8/2020,Y)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,10/8/2020,Y)
grunt> describe bag_deleted4;
bag_deleted4: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_update = JOIN bag_scd by id,bag_snapshot by id;
grunt> dump bag_update;
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y,1011,Harish,2,Hyd,Savings,10/5/2008)
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y,1012,Mohan,2,Bang,Savings,10/5/2012)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y,1013,Krishna,3,Hyd,Current,3/15/2012)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y,1017,Jani,1,Chen,Current,6/15/2012)
grunt> describe bag_update;
bag_update: {bag_scd::id: int,bag_scd::name: chararray,bag_scd::class: int,bag_scd::location: chararray,bag_scd::typeacc: chararray,bag_scd::doj: chararray,bag_scd::stdate: chararray,bag_scd::enddate: chararray,bag_scd::flag: chararray,bag_snapshot::id: int,bag_snapshot::name: chararray,bag_snapshot::class: int,bag_snapshot::location: chararray,bag_snapshot::typeacc: chararray,bag_snapshot::doj: chararray}
grunt> bag_update11 = filter bag_update by bag_scd::id != bag_snapshot::id OR bag_scd::name != bag_snapshot::name OR bag_scd::class != bag_snapshot::class OR bag_scd::location != bag_snapshot::location OR bag_scd::typeacc != bag_snapshot::typeacc;
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y,1012,Mohan,2,Bang,Savings,10/5/2012)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y,1013,Krishna,3,Hyd,Current,3/15/2012)
grunt> describe bag_update11;
bag_update11: {bag_scd::id: int,bag_scd::name: chararray,bag_scd::class: int,bag_scd::location: chararray,bag_scd::typeacc: chararray,bag_scd::doj: chararray,bag_scd::stdate: chararray,bag_scd::enddate: chararray,bag_scd::flag: chararray,bag_snapshot::id: int,bag_snapshot::name: chararray,bag_snapshot::class: int,bag_snapshot::location: chararray,bag_snapshot::typeacc: chararray,bag_snapshot::doj: chararray}
grunt> bag_update1 = FOREACH bag_update11 GENERATE bag_scd::id as id,bag_scd::name as name,bag_scd::class as class,bag_scd::location as location,bag_scd::typeacc as typeacc,bag_scd::doj as doj,bag_scd::stdate as stdate,bag_scd::enddate as enddate,bag_scd::flag as flag;
grunt> dump bag_update11;
grunt> dump bag_update11;
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y,1012,Mohan,2,Bang,Savings,10/5/2012)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y,1013,Krishna,3,Hyd,Current,3/15/2012)
grunt> describe bag_update11;
bag_update11: {bag_scd::id: int,bag_scd::name: chararray,bag_scd::class: int,bag_scd::location: chararray,bag_scd::typeacc: chararray,bag_scd::doj: chararray,bag_scd::stdate: chararray,bag_scd::enddate: chararray,bag_scd::flag: chararray,bag_snapshot::id: int,bag_snapshot::name: chararray,bag_snapshot::class: int,bag_snapshot::location: chararray,bag_snapshot::typeacc: chararray,bag_snapshot::doj: chararray}
grunt> bag_update1 = FOREACH bag_update11 GENERATE bag_scd::id as id,bag_scd::name as name,bag_scd::class as class,bag_scd::location as location,bag_scd::typeacc as typeacc,bag_scd::doj as doj,bag_scd::stdate as stdate,bag_scd::enddate as enddate,bag_scd::flag as flag;
grunt> dump bag_update1;
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y)
grunt> describe bag_update1;
bag_update1: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_update4 = FILTER bag_update1 BY flag == 'Y';
grunt> dump bag_update4;
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y)
grunt> describe bag_update4;
bag_update4: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> dump bag_scd_n;
(1022,Mohan,1,Bang,Current,1/25/2012,1/25/2012,1/1/2015,N)
(1014,Suresh,3,Bang,Savings,1/25/2009,1/25/2009,2/10/2015,N)
grunt> describe bag_scd_n;
bag_scd_n: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_inserted_ex = foreach bag_inserted3 generate id,name,class,location,typeacc,doj,'25-2-2015' as stdate,'31-12-9999' as enddate,'Y' as flag;
grunt> dump bag_inserted_ex;
(1010,Kumar,1,Hyd,Savings,2/24/2015,25-2-2015,31-12-9999,Y)
(1018,Radha,1,Pune,Savings,2/24/2015,25-2-2015,31-12-9999,Y)
grunt> describe bag_inserted_ex;
bag_inserted_ex: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> dump bag_deleted_ex;
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,24-2-2015,N)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,24-2-2015,N)
grunt> describe bag_deleted_ex;
bag_deleted_ex: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_update_ex = foreach bag_update4 generate id,name,class,location,typeacc,doj,stdate,'24-2-2015' as enddate,'N' as flag;
grunt> dump bag_update_ex;
1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,24-2-2015,N)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,24-2-2015,N)
grunt> describe bag_update_ex;
bag_update_ex: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_update_ex2_unuse = JOIN bag_snapshot by id,bag_update4 by id;
grunt> dump bag_update_ex2_unuse;
grunt> dump bag_update_ex2_unuse;
(1012,Mohan,2,Bang,Savings,10/5/2012,1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y)
(1013,Krishna,3,Hyd,Current,3/15/2012,1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y)
bag_update_ex2_unuse: {bag_snapshot::id: int,bag_snapshot::name: chararray,bag_snapshot::class: int,bag_snapshot::location: chararray,bag_snapshot::typeacc: chararray,bag_snapshot::doj: chararray,bag_update4::id: int,bag_update4::name: chararray,bag_update4::class: int,bag_update4::location: chararray,bag_update4::typeacc: chararray,bag_update4::doj: chararray,bag_update4::stdate: chararray,bag_update4::enddate: chararray,bag_update4::flag: chararray}
grunt> bag_update_ex3 = foreach bag_update_ex2_unuse generate bag_snapshot::id as id,bag_snapshot::name as name,bag_snapshot::class as class,bag_snapshot::location as location,bag_snapshot::typeacc as typeacc,bag_snapshot::doj as doj,'25-2-2015' as stdate,'31-12-9999' as enddate,'Y' as flag;
grunt> dump bag_update_ex3;
(1013,Krishna,3,Hyd,Current,3/15/2012,25-2-2015,31-12-9999,Y)
grunt> describe bag_update_ex3;
bag_update_ex3: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_active_unuse = filter bag_scd by flag == 'Y';
grunt> dump bag_active_unuse;
grunt> dump bag_active_unuse;
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,10/8/2020,Y)
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,10/8/2020,Y)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y)
grunt> describe bag_active_unuse;
bag_active_unuse: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_active_unuse2 = Join bag_active_unuse by id left outer,bag_update_ex3 by id;
grunt> dump bag_active_unuse2;
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y,,,,,,,,,)
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,10/8/2020,Y,1012,Mohan,2,Bang,Savings,10/5/2012,25-2-2015,31-12-9999,Y)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,10/8/2020,Y,1013,Krishna,3,Hyd,Current,3/15/2012,25-2-2015,31-12-9999,Y)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,10/8/2020,Y,,,,,,,,,)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y,,,,,,,,,)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,10/8/2020,Y,,,,,,,,,)
grunt> describe bag_active_unuse2;
bag_active_unuse2: {bag_active_unuse::id: int,bag_active_unuse::name: chararray,bag_active_unuse::class: int,bag_active_unuse::location: chararray,bag_active_unuse::typeacc: chararray,bag_active_unuse::doj: chararray,bag_active_unuse::stdate: chararray,bag_active_unuse::enddate: chararray,bag_active_unuse::flag: chararray,bag_update_ex3::id: int,bag_update_ex3::name: chararray,bag_update_ex3::class: int,bag_update_ex3::location: chararray,bag_update_ex3::typeacc: chararray,bag_update_ex3::doj: chararray,bag_update_ex3::stdate: chararray,bag_update_ex3::enddate: chararray,bag_update_ex3::flag: chararray}
grunt> bag_active_unuse3 = filter bag_active_unuse2 by bag_update_ex3::id is NULL;
grunt> dump bag_active_unuse3;
bag_active_unuse2: {bag_active_unuse::id: int,bag_active_unuse::name: chararray,bag_active_unuse::class: int,bag_active_unuse::location: chararray,bag_active_unuse::typeacc: chararray,bag_active_unuse::doj: chararray,bag_active_unuse::stdate: chararray,bag_active_unuse::enddate: chararray,bag_active_unuse::flag: chararray,bag_update_ex3::id: int,bag_update_ex3::name: chararray,bag_update_ex3::class: int,bag_update_ex3::location: chararray,bag_update_ex3::typeacc: chararray,bag_update_ex3::doj: chararray,bag_update_ex3::stdate: chararray,bag_update_ex3::enddate: chararray,bag_update_ex3::flag: chararray}
grunt> bag_active_unuse3 = filter bag_active_unuse2 by bag_update_ex3::id is NULL;
grunt> dump bag_active_unuse3;
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y,,,,,,,,,)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,10/8/2020,Y,,,,,,,,,)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y,,,,,,,,,)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,10/8/2020,Y,,,,,,,,,)
grunt> describe bag_active_unuse3;
bag_active_unuse3: {bag_active_unuse::id: int,bag_active_unuse::name: chararray,bag_active_unuse::class: int,bag_active_unuse::location: chararray,bag_active_unuse::typeacc: chararray,bag_active_unuse::doj: chararray,bag_active_unuse::stdate: chararray,bag_active_unuse::enddate: chararray,bag_active_unuse::flag: chararray,bag_update_ex3::id: int,bag_update_ex3::name: chararray,bag_update_ex3::class: int,bag_update_ex3::location: chararray,bag_update_ex3::typeacc: chararray,bag_update_ex3::doj: chararray,bag_update_ex3::stdate: chararray,bag_update_ex3::enddate: chararray,bag_update_ex3::flag: chararray}
grunt> bag_active_unuse4 = FOREACH bag_active_unuse3 GENERATE bag_active_unuse::id as id,bag_active_unuse::name as name,bag_active_unuse::class as class,bag_active_unuse::location as location,bag_active_unuse::typeacc as typeacc,bag_active_unuse::doj as doj,bag_active_unuse::stdate as stdate,bag_active_unuse::enddate as enddate,bag_active_unuse::flag as flag;
grunt> dump bag_active_unuse4;
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,10/8/2020,Y)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,10/8/2020,Y)
grunt> dump bag_active_unuse4;
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,10/8/2020,Y)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,10/8/2020,Y)
grunt> describe bag_active_unuse4;
bag_active_unuse4: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_active_unuse5 = join bag_active_unuse4 by id left outer,bag_deleted_ex by id;
grunt> dump bag_active_unuse5;
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y,,,,,,,,,)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,10/8/2020,Y,1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,24-2-2015,N)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y,,,,,,,,,)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,10/8/2020,Y,1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,24-2-2015,N)
grunt> describe bag_active_unuse5;
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y,,,,,,,,,)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,10/8/2020,Y,1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,24-2-2015,N)
grunt> describe bag_active_unuse5;
bag_active_unuse5: {bag_active_unuse4::id: int,bag_active_unuse4::name: chararray,bag_active_unuse4::class: int,bag_active_unuse4::location: chararray,bag_active_unuse4::typeacc: chararray,bag_active_unuse4::doj: chararray,bag_active_unuse4::stdate: chararray,bag_active_unuse4::enddate: chararray,bag_active_unuse4::flag: chararray,bag_deleted_ex::id: int,bag_deleted_ex::name: chararray,bag_deleted_ex::class: int,bag_deleted_ex::location: chararray,bag_deleted_ex::typeacc: chararray,bag_deleted_ex::doj: chararray,bag_deleted_ex::stdate: chararray,bag_deleted_ex::enddate: chararray,bag_deleted_ex::flag: chararray}
grunt> bag_active_unuse6 = filter bag_active_unuse5 by bag_deleted_ex::id is NULL;
grunt> dump bag_active_unuse6;
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y,,,,,,,,,)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y,,,,,,,,,)
grunt> describe bag_active_unuse6;
bag_active_unuse6: {bag_active_unuse4::id: int,bag_active_unuse4::name: chararray,bag_active_unuse4::class: int,bag_active_unuse4::location: chararray,bag_active_unuse4::typeacc: chararray,bag_active_unuse4::doj: chararray,bag_active_unuse4::stdate: chararray,bag_active_unuse4::enddate: chararray,bag_active_unuse4::flag: chararray,bag_deleted_ex::id: int,bag_deleted_ex::name: chararray,bag_deleted_ex::class: int,bag_deleted_ex::location: chararray,bag_deleted_ex::typeacc: chararray,bag_deleted_ex::doj: chararray,bag_deleted_ex::stdate: chararray,bag_deleted_ex::enddate: chararray,bag_deleted_ex::flag: chararray}
bag_active_unuse6: {bag_active_unuse4::id: int,bag_active_unuse4::name: chararray,bag_active_unuse4::class: int,bag_active_unuse4::location: chararray,bag_active_unuse4::typeacc: chararray,bag_active_unuse4::doj: chararray,bag_active_unuse4::stdate: chararray,bag_active_unuse4::enddate: chararray,bag_active_unuse4::flag: chararray,bag_deleted_ex::id: int,bag_deleted_ex::name: chararray,bag_deleted_ex::class: int,bag_deleted_ex::location: chararray,bag_deleted_ex::typeacc: chararray,bag_deleted_ex::doj: chararray,bag_deleted_ex::stdate: chararray,bag_deleted_ex::enddate: chararray,bag_deleted_ex::flag: chararray}
grunt> bag_active7 = foreach bag_active_unuse6 GENERATE bag_active_unuse4::id as id,bag_active_unuse4::name as name,bag_active_unuse4::class as class,bag_active_unuse4::location as location,bag_active_unuse4::typeacc as typeacc,bag_active_unuse4::doj as doj,bag_active_unuse4::stdate as stdate,bag_active_unuse4::enddate as enddate,bag_active_unuse4::flag as flag;
grunt> dump bag_active7;
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y)
grunt> describe bag_active7;
bag_active7: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_union = UNION bag_active7,bag_scd_n,bag_inserted_ex,bag_deleted_ex,bag_update_ex,bag_update_ex3;
grunt> dump bag_union;
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y)
grunt> describe bag_active7;
bag_active7: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> bag_union = UNION bag_active7,bag_scd_n,bag_inserted_ex,bag_deleted_ex,bag_update_ex,bag_update_ex3;
grunt> dump bag_union;
(1022,Mohan,1,Bang,Current,1/25/2012,1/25/2012,1/1/2015,N)
(1014,Suresh,3,Bang,Savings,1/25/2009,1/25/2009,2/10/2015,N)(1012,Mohan,2,Bang,Savings,10/5/2012,25-2-2015,31-12-9999,Y)
(1013,Krishna,3,Hyd,Current,3/15/2012,25-2-2015,31-12-9999,Y)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,24-2-2015,N)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,24-2-2015,N)
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,24-2-2015,N)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,24-2-2015,N)
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y)
(1010,Kumar,1,Hyd,Savings,2/24/2015,25-2-2015,31-12-9999,Y)
(1018,Radha,1,Pune,Savings,2/24/2015,25-2-2015,31-12-9999,Y)
grunt> describe bag_union;
bag_union: {id: int,name: chararray,class: int,location: chararray,typeacc: chararray,doj: chararray,stdate: chararray,enddate: chararray,flag: chararray}
grunt> cat scd_dir/scd2file
1022,Mohan,1,Bang,Current,1/25/2012,1/25/2012,1/1/2015,N
1014,Suresh,3,Bang,Savings,1/25/2009,1/25/2009,2/10/2015,N
1012,Mohan,2,Bang,Savings,10/5/2012,25-2-2015,31-12-9999,Y
1013,Krishna,3,Hyd,Current,3/15/2012,25-2-2015,31-12-9999,Y
1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,24-2-2015,N
1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,24-2-2015,N
1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,24-2-2015,N
1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,24-2-2015,N
1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y
1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y
1010,Kumar,1,Hyd,Savings,2/24/2015,25-2-2015,31-12-9999,Y
1018,Radha,1,Pune,Savings,2/24/2015,25-2-2015,31-12-9999,Y
grunt> dump bag_union;
(1022,Mohan,1,Bang,Current,1/25/2012,1/25/2012,1/1/2015,N)
(1014,Suresh,3,Bang,Savings,1/25/2009,1/25/2009,2/10/2015,N)
(1012,Mohan,2,Bang,Savings,10/5/2012,25-2-2015,31-12-9999,Y)
(1013,Krishna,3,Hyd,Current,3/15/2012,25-2-2015,31-12-9999,Y)
(1015,Lakshmi,2,USA,Savings,5/5/2012,5/5/2012,24-2-2015,N)
(1033,Suresh,1,Pune,Savings,10/5/2010,10/5/2010,24-2-2015,N)
(1012,Mohan,1,Pune,Current,10/5/2012,10/5/2012,24-2-2015,N)
(1013,Krishna,3,NZ,Current,3/15/2012,3/15/2012,24-2-2015,N)
(1011,Harish,2,Hyd,Savings,10/5/2008,10/5/2008,12/12/2020,Y)
(1017,Jani,1,Chen,Current,6/15/2012,6/15/2012,10/8/2020,Y)
(1010,Kumar,1,Hyd,Savings,2/24/2015,25-2-2015,31-12-9999,Y)
(1018,Radha,1,Pune,Savings,2/24/2015,25-2-2015,31-12-9999,Y)
No comments:
Post a Comment