insert into
create table dw.dim_area_code (
country_name string comment "国家名称",
country_code string comment "国家代码",
province_name string comment "省份名称",
city_name string comment "地级市",
city_area_code string comment "城市代码",
city_zip_code string comment "城市邮编",
county_name string comment "县级市、区名称",
county_area_code string comment "县级市、区名代码",
county_zip_code string comment "县级市、区名邮编"
)STORED as ORC TBLPROPERTIES ("comment"="区域代码表 by zhangjiqiang")
;
insert into dw.dim_area_code values
('中国',86,'北京市','北京市',010,100000,'东城区',010,100000),
('中国',86,'北京市','北京市',010,100000,'西城区',010,100000),
('中国',86,'北京市','北京市',010,100000,'崇文区',010,100000),
('中国',86,'北京市','北京市',010,100000,'宣武区',010,100000),
('中国',86,'北京市','北京市',010,100000,'朝阳区',010,100000);
load data
数据量比较大的,可以将数据存放成 csv 格式的文件,通过 hive 的
load data
命令加载到 hive 表,由于数据是文本文件,因此,此处的 hive 表也必须是普通表。 否则在使用load data
命令时,hive 会提示SemanticException Unable to load data to destination table. Error: The file that you are trying to load does not match the file format of the destination table.
DROP TABLE IF EXISTS `dw.tmp_dim_phone_segment_info`;
CREATE TABLE `dw.tmp_dim_phone_segment_info` (
`id` int comment '原来 mysql 自增 id',
`phone_pref` string comment '号码前缀',
`phone_segment` string comment '号段',
`province_name` string comment '省份名称',
`city_county_name` string comment '地级市 or 县级市 or 区城市名称',
`vendor` string comment '运营商',
`zip_code` string comment '邮编',
`phone_area_code` string comment '电话号码区域代码',
`addr_area_code` string comment '行政区域划分代码'
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
TBLPROPERTIES ("comment"="手机号码段详细信息 by zhangjiqiang");
load data local inpath '/home/dev/zhangjq/dim_phone_segment_info_insert.csv'
into table dw.tmp_dim_phone_segment_info;
普通文件格式的表比较占用空间,orc 是 Apache 顶级项目,高压缩率
DROP TABLE IF EXISTS `dw.dim_phone_segment_info`;
CREATE TABLE `dw.dim_phone_segment_info` (
`id` int comment '原来 mysql 自增 id',
`phone_pref` string comment '号码前缀',
`phone_segment` string comment '号段',
`province_name` string comment '省份名称',
`city_county_name` string comment '地级市 or 县级市 or 区城市名称',
`vendor` string comment '运营商',
`zip_code` string comment '邮编',
`phone_area_code` string comment '电话号码区域代码',
`addr_area_code` string comment '行政区域划分代码'
) stored as ORC
TBLPROPERTIES ("comment"="手机号码段详细信息 by zhangjiqiang");
insert overwrite table dw.dim_phone_segment_info
select
a.id,
a.phone_pref,
a.phone_segment,
a.province_name,
a.city_county_name,
a.vendor,
a.zip_code,
a.phone_area_code,
a.addr_area_code
from dw.tmp_dim_phone_segment_info a