Hive对hadoop集群及数据库操作说明文档
hive> create table userinfo(id int, name string) row format delimited fields terminated by '\t';
hive> create table choice(userid int, classname string) row format delimited fields terminated by '\t';
hive> create table classinfo(teacher string, classname string) row format delimited fields terminated by '\t';
分别建立三个本地数据表单data1,data2,data3,然后导入到hive中,导入具体操作如下:
hive> load data local inpath '/home/hadoop/hive-data/data1' overwrite into table userinfo;
hive> load data local inpath '/home/hadoop/hive-data/data2' overwrite into table choice;
hive> load data local inpath '/home/hadoop/hive-data/data3' overwrite into table classinfo;
hive> select * from userinfo
> ;
1 wangming
2 xiaoming
3 wujing
4 zhangqi
5 xiaomi
6 wangyi
hive> create table ptest(userid int) partitioned by (name string) row format delimited fields terminated by '\t';
hive> load data local inpath '/home/hadoop/hive-data/data1' into table ptest partition(name='jack');
hive> dfs -ls /user/hive/warehouse/ptest/name=jack;
Found 1 items
-rw-r--r-- 2 hadoop supergroup 59 2012-08-29 19:50 /user/hive/warehouse/ptest/name=jack/data1
hive> select userid from ptest where name='jack';
1
2
3
4
5
6
hive> show partitions ptest;
name=jack
hive> insert overwrite table ptest partition(name='jack') select name from userinfo;
hive> set hive.enforce.bucketing=true;
hive> set hive.enforce.bucketing;
hive.enforce.bucketing=true
hive> create table btest2(id int, name string) clustered by(id) into 3 buckets row format delimited fields terminated by '\t';
hive> insert overwrite table btest2 select * from userinfo;
hive> dfs -ls /user/hive/warehouse/btest2;
Found 3 items
-rw-r--r-- 2 hadoop supergroup 18 2012-08-29 20:09 /user/hive/warehouse/btest2/000000_0
-rw-r--r-- 2 hadoop supergroup 21 2012-08-29 20:09 /user/hive/warehouse/btest2/000001_0
-rw-r--r-- 2 hadoop supergroup 20 2012-08-29 20:09 /user/hive/warehouse/btest2/000002_0
hive> dfs -cat /user/hive/warehouse/btest2/000001_0;
1 wangming
4 zhangqi
hive> dfs -cat /user/hive/warehouse/btest2/000000_0;
3 wujing
6 wangyi
hive> dfs -cat /user/hive/warehouse/btest2/000002_0;
2 xiaoming
5 xiaomi
hive> select * from btest2 tablesample(bucket 1 out of 3 on id);
3 wujing
6 wangyi
hive> create table mutil1 as select id,name from userinfo;
hive> create table mutil2 like mutil1;
hive> from userinfo insert overwrite table mutil1 select id,name insert overwrite table mutil2 select count(distinct id),name group by name;
hive> alter table mutil1 rename to mutil11;
hive> drop table mutil11;
OK
Time taken: 0.534 seconds
hive> show tables;
OK
btest2
choice
classinfo
mutil2
ptest
tt
userinfo
Time taken: 0.037 seconds
hive> select * from userinfo;
OK
1 wangming
2 xiaoming
3 wujing
4 zhangqi
5 xiaomi
6 wangyi
Time taken: 0.052 seconds
hive> select * from choice;
OK
1 class1
2 class2
3 class3
4 class4
5 class5
6 class6
Time taken: 0.05 seconds
hive> select userinfo.*,choice.* from userinfo join choice on (userinfo.id=choice.userid);
1 wangming 1 class1
2 xiaoming 2 class2
3 wujing 3 class3
4 zhangqi 4 class4
5 xiaomi 5 class5
6 wangyi 6 class6
hive> select userinfo.*,choice.* from userinfo left outer join choice on (userinfo.id=choice.userid);
1 wangming 1 class1
2 xiaoming 2 class2
3 wujing 3 class3
4 zhangqi 4 class4
5 xiaomi 5 class5
6 wangyi 6 class6
hive> select userinfo.*,choice.* from userinfo right outer join choice on (userinfo.id=choice.userid);
1 wangming 1 class1
2 xiaoming 2 class2
3 wujing 3 class3
4 zhangqi 4 class4
5 xiaomi 5 class5
6 wangyi 6 class6
hive> select userinfo.* from userinfo full outer join choice on (userinfo.id=choice.userid);
1 wangming
2 xiaoming
3 wujing
4 zhangqi
5 xiaomi
6 wangyi
hive> select userinfo.* from userinfo left semi join choice on (userinfo.id=choice.userid);
1 wangming
2 xiaoming
3 wujing
4 zhangqi
5 xiaomi
6 wangyi
hive> select teacher,MAX(class_num) from (select teacher,count(classname) as class_num from classinfo group by teacher) subq group by teacher;
pro.li 1
pro.liu 1
pro.mi 1
pro.qi 1
pro.wang 1
pro.zhang 1
hive> create view teacher_classnum as select teacher,count(classname) from classinfo group by teacher;