描述
CREATE TABLE 语句使用 Hive 格式定义一个新表。语法
CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier[ ( col_name1[:] col_type1 [ COMMENT col_comment1 ], ... ) ][ COMMENT table_comment ][ PARTITIONED BY ( col_name2[:] col_type2 [ COMMENT col_comment2 ], ... )| ( col_name1, col_name2, ... ) ][ CLUSTERED BY ( col_name1, col_name2, ...)[ SORTED BY ( col_name1 [ ASC | DESC ], col_name2 [ ASC | DESC ], ... ) ]INTO num_buckets BUCKETS ][ ROW FORMAT row_format ][ STORED AS file_format ][ LOCATION path ][ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ][ AS select_statement ]
注意:
列定义子句和
AS SELECT 子句之间的子句可以以任意顺序出现。参数说明
子句/关键字 | 用途 | 说明 |
table_identifier | 表名定义 | 指定表名,可选择性地用数据库名限定。语法: [database_name.]table_name |
EXTERNAL | 存储方式 | 使用 LOCATION 提供的路径定义表,不使用默认仓库路径;DROP TABLE 时不会删除实际数据 |
PARTITIONED BY | 分区 | 根据指定的列对表进行分区,支持单列或多列分区,用于分区裁剪优化 |
CLUSTERED BY | 分桶 | 将创建的分区内的数据按指定列散列为固定桶数;注意:分桶是一种优化技术 |
SORTED BY | 桶内排序 | 指定桶内数据的排序顺序,通常配合 CLUSTERED BY 使用以减少桶内数据扫描量 |
INTO num_buckets BUCKETS | 桶数量 | 指定分桶的数量;必须与 CLUSTERED BY 一起使用 |
row_format | 行格式 | 指定输入和输出的行格式,如分隔符、SerDe 序列化/反序列化规则等 |
STORED AS | 文件格式 | 表底层存储的文件格式,支持 TEXTFILE、ORC、PARQUET、ICEBERG 等 |
LOCATION | 存储路径 | 表数据存储的目录路径,常与 EXTERNAL 联合使用指向外部存储位置 |
COMMENT | 表注释 | 用于描述表的字符串字面量,方便元数据管理和团队协作理解表用途 |
TBLPROPERTIES | 表属性 | 用于标记/配置表定义的键值对列表(如 Iceberg format-version、压缩算法等) |
AS select_statement | CTAS 建表 | 使用 SELECT 语句查询结果的数据直接填充并创建表(Create Table As Select) |
示例
-- STORED AS ORCCREATE EXTERNAL TABLE hive_t1 (id INT, name STRING, age INT)STORED AS ORC LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t1';-- STORED AS PARQUETCREATE EXTERNAL TABLE hive_t2 (id INT, name STRING, age INT)STORED AS PARQUET LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t2';-- COMMENT + TBLPROPERTIESCREATE EXTERNAL TABLE hive_t3 (id INT, name STRING, age INT)COMMENT 'test hive format table'STORED AS ORCTBLPROPERTIES ('foo'='bar')LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t3';-- PARTITIONED BYCREATE EXTERNAL TABLE hive_t4 (id INT, name STRING)PARTITIONED BY (age INT)STORED AS ORCLOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t4';-- ROW FORMAT + TEXTFILECREATE EXTERNAL TABLE hive_t5 (id INT, name STRING)ROW FORMAT DELIMITED FIELDS TERMINATED BY ','STORED AS TEXTFILELOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t5';-- 复杂数据类型(ARRAY / MAP / STRUCT)CREATE EXTERNAL TABLE hive_family (name STRING,friends ARRAY<STRING>,children MAP<STRING, INT>,address STRUCT<street: STRING, city: STRING>) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','COLLECTION ITEMS TERMINATED BY '_'MAP KEYS TERMINATED BY ':'STORED AS TEXTFILELOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_family';-- CLUSTERED BYCREATE EXTERNAL TABLE hive_clustered1 (ID INT, AGE STRING)CLUSTERED BY (ID) INTO 4 BUCKETSSTORED AS ORCLOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_clustered1';-- CLUSTERED BY + SORTED BY + PARTITIONED BYCREATE EXTERNAL TABLE hive_clustered2 (ID INT, NAME STRING)PARTITIONED BY (YEAR STRING)CLUSTERED BY (ID, NAME) SORTED BY (ID ASC) INTO 3 BUCKETSSTORED AS PARQUETLOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_clustered2';-- INSERT 数据并验证INSERT INTO hive_t2 VALUES (1, 'Alice', 20), (2, 'Bob', 22);SELECT * FROM hive_t2;-- CTAS + LOCATION Hive 格式CREATE TABLE hive_t6STORED AS PARQUETLOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t6_v2'AS SELECT * FROM hive_t2;-- IF NOT EXISTSCREATE EXTERNAL TABLE IF NOT EXISTS hive_t1 (id INT, name STRING, age INT)STORED AS ORC LOCATION 'cosn://<your_cos_bucket>/test_create_table_hive/hive_t1';-- DESC TABLEDESC TABLE EXTENDED hive_t1;