INSERT TABLE

最近更新时间:2026-05-20 14:11:22

我的收藏

描述

INSERT 语句用于向表中插入新行或覆盖表中的现有数据。插入的行可以通过值表达式指定或从查询结果获取。

语法

-- 标准: 插入或覆盖
INSERT [ INTO | OVERWRITE ] [ TABLE ] table_identifier [ partition_spec ] [ ( column_list ) | [BY NAME] ]
{ VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query }

参数说明

参数
说明
语法
table_identifier
指定表名,可选加数据库名限定
[ database_name. ] table_name
partition_spec
可选。指定分区键值对列表,支持类型化字面量(如 date'2019-01-02'
PARTITION ( partition_col_name = partition_col_val [ , ... ] )
column_list
可选。指定表的列名列表(逗号分隔),Spark 按此重排输入列以匹配表 Schema。
注意:
所有列必须存在于表中且不能重复(不含静态分区列)
列数必须与 VALUES 或查询数据完全一致
( col1, col2, [ , ... ] )
BY NAME
可选。按列名匹配而非位置匹配,Spark 自动按表中字段顺序重排查询字段
BY NAME
VALUES
指定插入值,支持显式值或 NULL。每组值用逗号分隔,可指定多组插入多行
VALUES ( { value | NULL } [ , … ] ) [ , ( … ) ]
query
生成待插入行的查询。支持三种格式:
SELECT 语句
TABLE 语句
FROM 语句
SELECT ... / TABLE t / FROM t

示例

-- ========== 一、非分区表 ==========
CREATE EXTERNAL TABLE insert_basic (name STRING, address STRING, student_id INT)
USING PARQUET LOCATION 'cosn://...';
INSERT INTO insert_basic VALUES ('Amy Smith', '123 Park Ave', 111111);
INSERT INTO insert_basic VALUES
('Bob Brown', '456 Taylor St', 222222),
('Cathy Johnson', '789 Race Ave', 333333);
SELECT * FROM insert_basic;
INSERT OVERWRITE insert_basic VALUES ('Ashua Hill', '456 Erica Ct', 111111);
INSERT INTO insert_basic (address, name, student_id) VALUES ('Hangzhou', 'Kent', 555555);

-- 跨表 SELECT 插入
CREATE EXTERNAL TABLE insert_persons2 (name STRING, address STRING, ssn INT)
USING PARQUET LOCATION 'cosn://...';
INSERT INTO insert_persons2 VALUES ('Eddie Davis', '245 Market St', 345678901);
INSERT INTO insert_basic SELECT name, address, ssn FROM insert_persons2 WHERE name = 'Eddie Davis';

-- ========== 二、分区表 (Hive STORED AS) ==========
CREATE EXTERNAL TABLE insert_part (name STRING, address STRING)
PARTITIONED BY (student_id INT) STORED AS PARQUET LOCATION 'cosn://...';
INSERT INTO insert_part PARTITION (student_id = 444444) VALUES ('Dora', '134 Forest Ave');
INSERT OVERWRITE insert_part PARTITION (student_id = 444444) VALUES ('New Name', 'New Addr');
SELECT * FROM insert_part;

-- ========== 三、Spark 文档: 分区表示例 ==========
CREATE EXTERNAL TABLE students (name VARCHAR(64), address VARCHAR(64))
USING PARQUET PARTITIONED BY (student_id INT) LOCATION 'cosn://...';
INSERT INTO students VALUES ('Amy Smith', '123 Park Ave, San Jose', 111111);
INSERT INTO students VALUES
('Bob Brown', '456 Taylor St, Cupertino', 222222),
('Cathy Johnson', '789 Race Ave, Palo Alto', 333333);
INSERT INTO students (address, name, student_id) VALUES ('Hangzhou, China', 'Kent Yao', 11215016);
INSERT INTO students PARTITION (student_id = 11215017) (address, name) VALUES ('Hangzhou, China', 'Kent Yao Jr.');

-- ========== 四、OVERWRITE 多行 ==========
INSERT OVERWRITE students VALUES
('Ashua Hill', '456 Erica Ct, Cupertino', 111111),
('Brian Reed', '723 Kern Ave, Palo Alto', 222222);

-- ========== 五、BY NAME(按列名匹配)==========
CREATE EXTERNAL TABLE persons (name STRING, address STRING) USING PARQUET LOCATION 'cosn://...';
INSERT INTO persons VALUES ('Dora Williams', '100 Main St');
INSERT INTO students PARTITION (student_id = 222222) BY NAME
SELECT address, name FROM persons WHERE name = 'Dora Williams';
INSERT OVERWRITE students PARTITION (student_id = 222222) BY NAME
SELECT 'Unknown' as address, name FROM persons WHERE name = 'Dora Williams';

-- ========== 六、TABLE 语句插入 ==========
CREATE EXTERNAL TABLE visiting_students (name VARCHAR(64), address VARCHAR(64), student_id INT)
USING PARQUET LOCATION 'cosn://...';
INSERT INTO visiting_students VALUES ('Visitor One', '1 Visitor Lane', 999999);
INSERT INTO students TABLE visiting_students;
INSERT OVERWRITE students TABLE visiting_students;