pig基本语法——join
强烈推介IDEA2020.2破解激活,IntelliJ IDEA 注册码,2020.2 IDEA 激活码
主要参考:
https://book.itxueyuan.com/3b7D/PDLO
1、基础数据:
==============================================
[root@cdh1 data]# cat demodata
xiaoxiao,12,12.1f
aaa,13,1.1f
kjkj,12,12.1f
ddf,19,12.8f
youyou,89,12.3f
[root@cdh1 data]# cat demodata2
xiaoxiao,99,aaaaaaaaaaaa
aaa,88,bbbbbbbbbbb
kjkj,77,ccccccccccc
ddf,66,dddddddddd
xuexue,11,sdfsdfsdfsdf
==============================================
2、innerjoin
grunt> A = load '/root/xytest/pig/data/demodata' using PigStorage(',') as (name:chararray,age:int,gpa:float);
grunt> B = load '/root/xytest/pig/data/demodata2' using PigStorage(',') as (name:chararray,score:int,address:chararray);
grunt> C = join A by name,B by name;
grunt> dump C;
输出结果:
(aaa,13,1.1,aaa,88,bbbbbbbbbbb)
(ddf,19,12.8,ddf,66,dddddddddd)
(kjkj,12,12.1,kjkj,77,ccccccccccc)
(xiaoxiao,12,12.1,xiaoxiao,99,aaaaaaaaaaaa)
3、left outer join
grunt> A = load '/root/xytest/pig/data/demodata' using PigStorage(',') as (name:chararray,age:int,gpa:float);
grunt> B = load '/root/xytest/pig/data/demodata2' using PigStorage(',') as (name:chararray,score:int,address:chararray);
grunt> D = join A by name left,B by name;
grunt> dump D;
输出结果:
(aaa,13,1.1,aaa,88,bbbbbbbbbbb)
(ddf,19,12.8,ddf,66,dddddddddd)
(kjkj,12,12.1,kjkj,77,ccccccccccc)
(youyou,89,12.3,,,)
(xiaoxiao,12,12.1,xiaoxiao,99,aaaaaaaaaaaa)
4、right outer join
grunt> A = load '/root/xytest/pig/data/demodata' using PigStorage(',') as (name:chararray,age:int,gpa:float);
grunt> B = load '/root/xytest/pig/data/demodata2' using PigStorage(',') as (name:chararray,score:int,address:chararray);
grunt> E = join A by name right,B by name;
grunt> dump E;
输出结果:
(aaa,13,1.1,aaa,88,bbbbbbbbbbb)
(ddf,19,12.8,ddf,66,dddddddddd)
(kjkj,12,12.1,kjkj,77,ccccccccccc)
(,,,xuexue,11,sdfsdfsdfsdf)
(xiaoxiao,12,12.1,xiaoxiao,99,aaaaaaaaaaaa)