CREATE TABLE USING HIVE

最近更新时间:2026-05-20 14:11:22

我的收藏

描述

CREATE TABLE 语句使用 Hive 格式定义一个新表。

语法

CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier
[ ( col_name1[:] col_type1 [ COMMENT col_comment1 ], ... ) ]
[ COMMENT table_comment ]
[ PARTITIONED BY ( col_name2[:] col_type2 [ COMMENT col_comment2 ], ... )
| ( col_name1, col_name2, ... ) ]
[ CLUSTERED BY ( col_name1, col_name2, ...)
[ SORTED BY ( col_name1 [ ASC | DESC ], col_name2 [ ASC | DESC ], ... ) ]
INTO num_buckets BUCKETS ]
[ ROW FORMAT row_format ]
[ STORED AS file_format ]
[ LOCATION path ]
[ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ]
[ AS select_statement ]
注意:
列定义子句和 AS SELECT 子句之间的子句可以以任意顺序出现。

参数说明

子句/关键字
用途
说明
table_identifier
表名定义
指定表名,可选择性地用数据库名限定。语法:[database_name.]table_name
EXTERNAL
存储方式
使用 LOCATION 提供的路径定义表,不使用默认仓库路径;DROP TABLE 时不会删除实际数据
PARTITIONED BY
分区
根据指定的列对表进行分区,支持单列或多列分区,用于分区裁剪优化
CLUSTERED BY
分桶
将创建的分区内的数据按指定列散列为固定桶数;注意:分桶是一种优化技术
SORTED BY
桶内排序
指定桶内数据的排序顺序,通常配合 CLUSTERED BY 使用以减少桶内数据扫描量
INTO num_buckets BUCKETS
桶数量
指定分桶的数量;必须与 CLUSTERED BY 一起使用
row_format
行格式
指定输入和输出的行格式,如分隔符、SerDe 序列化/反序列化规则等
STORED AS
文件格式
表底层存储的文件格式,支持 TEXTFILE、ORC、PARQUET、ICEBERG 等
LOCATION
存储路径
表数据存储的目录路径,常与 EXTERNAL 联合使用指向外部存储位置
COMMENT
表注释
用于描述表的字符串字面量,方便元数据管理和团队协作理解表用途
TBLPROPERTIES
表属性
用于标记/配置表定义的键值对列表(如 Iceberg format-version、压缩算法等)
AS select_statement
CTAS 建表
使用 SELECT 语句查询结果的数据直接填充并创建表(Create Table As Select)

示例

-- STORED AS ORC
CREATE EXTERNAL TABLE hive_t1 (id INT, name STRING, age INT)
STORED AS ORC LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t1';

-- STORED AS PARQUET
CREATE EXTERNAL TABLE hive_t2 (id INT, name STRING, age INT)
STORED AS PARQUET LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t2';

-- COMMENT + TBLPROPERTIES
CREATE EXTERNAL TABLE hive_t3 (id INT, name STRING, age INT)
COMMENT 'test hive format table'
STORED AS ORC
TBLPROPERTIES ('foo'='bar')
LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t3';

-- PARTITIONED BY
CREATE EXTERNAL TABLE hive_t4 (id INT, name STRING)
PARTITIONED BY (age INT)
STORED AS ORC
LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t4';

-- ROW FORMAT + TEXTFILE
CREATE EXTERNAL TABLE hive_t5 (id INT, name STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t5';

-- 复杂数据类型(ARRAY / MAP / STRUCT)
CREATE EXTERNAL TABLE hive_family (
name STRING,
friends ARRAY<STRING>,
children MAP<STRING, INT>,
address STRUCT<street: STRING, city: STRING>
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
COLLECTION ITEMS TERMINATED BY '_'
MAP KEYS TERMINATED BY ':'
STORED AS TEXTFILE
LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_family';

-- CLUSTERED BY
CREATE EXTERNAL TABLE hive_clustered1 (ID INT, AGE STRING)
CLUSTERED BY (ID) INTO 4 BUCKETS
STORED AS ORC
LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_clustered1';

-- CLUSTERED BY + SORTED BY + PARTITIONED BY
CREATE EXTERNAL TABLE hive_clustered2 (ID INT, NAME STRING)
PARTITIONED BY (YEAR STRING)
CLUSTERED BY (ID, NAME) SORTED BY (ID ASC) INTO 3 BUCKETS
STORED AS PARQUET
LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_clustered2';

-- INSERT 数据并验证
INSERT INTO hive_t2 VALUES (1, 'Alice', 20), (2, 'Bob', 22);
SELECT * FROM hive_t2;

-- CTAS + LOCATION Hive 格式
CREATE TABLE hive_t6
STORED AS PARQUET
LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t6_v2'
AS SELECT * FROM hive_t2;

-- IF NOT EXISTS
CREATE EXTERNAL TABLE IF NOT EXISTS hive_t1 (id INT, name STRING, age INT)
STORED AS ORC LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t1';

-- DESC TABLE
DESC TABLE EXTENDED hive_t1;