Search This Blog

Sunday, 23 December 2018

scd type 2 in hive


truncate table cust_income;
truncate table cust_income_intmdt;
truncate table cust_income_target;


insert into table cust_income values
(1,'P','100000','20000','120000','1-2-2016'),
(2,'P','120000','20000','140000','1-2-2016'),
(3,'P','130000','20000','150000','1-2-2016');


insert into cust_income_intmdt
select x.cust_id, x.person_org_code, x.annual_net_inc, x.total_tax, x.est_gross_inc, x.dw_ld_ts, x.start_dt, x.end_dt from cust_income_target x where end_dt is not null
union all
select x.cust_id, x.person_org_code, x.annual_net_inc, x.total_tax, x.est_gross_inc, x.dw_ld_ts, x.start_dt, y.dw_ld_ts end_dt from cust_income_target x
left outer join
cust_income y on (x.cust_id = y.cust_id) where x.end_dt is null
union all
select x.cust_id, x.person_org_code, x.annual_net_inc, x.total_tax, x.est_gross_inc, x.dw_ld_ts, x.dw_ld_ts start_dt, null end_dt from cust_income x ;

truncate table cust_income_target;

insert into cust_income_target
select * from cust_income_intmdt;

truncate table cust_income_intmdt;

truncate table cust_income;

insert into table cust_income values
(1,'P','110000','20000','120000','2-2-2016'),
(2,'P','130000','20000','140000','2-2-2016'),
(4,'P','130000','20000','150000','2-2-2016');

insert into cust_income_intmdt
select x.cust_id, x.person_org_code, x.annual_net_inc, x.total_tax, x.est_gross_inc, x.dw_ld_ts, x.start_dt, x.end_dt from cust_income_target x where end_dt is not null
union all
select x.cust_id, x.person_org_code, x.annual_net_inc, x.total_tax, x.est_gross_inc, x.dw_ld_ts, x.start_dt, y.dw_ld_ts end_dt from cust_income_target x
left outer join
cust_income y on (x.cust_id = y.cust_id) where x.end_dt is null
union all
select x.cust_id, x.person_org_code, x.annual_net_inc, x.total_tax, x.est_gross_inc, x.dw_ld_ts, x.dw_ld_ts start_dt, null end_dt from cust_income x ;

truncate table cust_income_target;

insert into cust_income_target
select * from cust_income_intmdt;

truncate table cust_income_intmdt;

truncate table cust_income;

insert into table cust_income values
(4,'P','120000','20000','120000','3-2-2016'),
(2,'P','140000','20000','140000','3-2-2016'),
(5,'P','150000','20000','150000','3-2-2016');

insert into cust_income_intmdt
select x.cust_id, x.person_org_code, x.annual_net_inc, x.total_tax, x.est_gross_inc, x.dw_ld_ts, x.start_dt, x.end_dt from cust_income_target x where end_dt is not null
union all
select x.cust_id, x.person_org_code, x.annual_net_inc, x.total_tax, x.est_gross_inc, x.dw_ld_ts, x.start_dt, y.dw_ld_ts end_dt from cust_income_target x
left outer join
cust_income y on (x.cust_id = y.cust_id) where x.end_dt is null
union all
select x.cust_id, x.person_org_code, x.annual_net_inc, x.total_tax, x.est_gross_inc, x.dw_ld_ts, x.dw_ld_ts start_dt, null end_dt from cust_income x ;

truncate table cust_income_target;

insert into cust_income_target
select * from cust_income_intmdt;

truncate table cust_income_intmdt;

No comments:

Post a Comment

Spark Memory Management

 Spark’s performance advantage over MapReduce  is due to Spark’s In-Memory Persistence and Memory Management Rather than writing to disk ...