描述
INSERT 语句用于向表中插入新行或覆盖表中的现有数据。插入的行可以通过值表达式指定或从查询结果获取。语法
-- 标准: 插入或覆盖INSERT [ INTO | OVERWRITE ] [ TABLE ] table_identifier [ partition_spec ] [ ( column_list ) | [BY NAME] ]{ VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query }
参数说明
参数 | 说明 | 语法 |
table_identifier | 指定表名,可选加数据库名限定 | [ database_name. ] table_name |
partition_spec | 可选。指定分区键值对列表,支持类型化字面量(如 date'2019-01-02') | PARTITION ( partition_col_name = partition_col_val [ , ... ] ) |
column_list | 可选。指定表的列名列表(逗号分隔),Spark 按此重排输入列以匹配表 Schema。 注意: 所有列必须存在于表中且不能重复(不含静态分区列) 列数必须与 VALUES 或查询数据完全一致 | ( col1, col2, [ , ... ] ) |
BY NAME | 可选。按列名匹配而非位置匹配,Spark 自动按表中字段顺序重排查询字段 | BY NAME |
VALUES | 指定插入值,支持显式值或 NULL。每组值用逗号分隔,可指定多组插入多行 | VALUES ( { value | NULL } [ , … ] ) [ , ( … ) ] |
query | 生成待插入行的查询。支持三种格式: SELECT 语句 TABLE 语句FROM 语句 | SELECT ... / TABLE t / FROM t |
示例
-- ========== 一、非分区表 ==========CREATE EXTERNAL TABLE insert_basic (name STRING, address STRING, student_id INT)USING PARQUET LOCATION 'cosn://...';INSERT INTO insert_basic VALUES ('Amy Smith', '123 Park Ave', 111111);INSERT INTO insert_basic VALUES('Bob Brown', '456 Taylor St', 222222),('Cathy Johnson', '789 Race Ave', 333333);SELECT * FROM insert_basic;INSERT OVERWRITE insert_basic VALUES ('Ashua Hill', '456 Erica Ct', 111111);INSERT INTO insert_basic (address, name, student_id) VALUES ('Hangzhou', 'Kent', 555555);-- 跨表 SELECT 插入CREATE EXTERNAL TABLE insert_persons2 (name STRING, address STRING, ssn INT)USING PARQUET LOCATION 'cosn://...';INSERT INTO insert_persons2 VALUES ('Eddie Davis', '245 Market St', 345678901);INSERT INTO insert_basic SELECT name, address, ssn FROM insert_persons2 WHERE name = 'Eddie Davis';-- ========== 二、分区表 (Hive STORED AS) ==========CREATE EXTERNAL TABLE insert_part (name STRING, address STRING)PARTITIONED BY (student_id INT) STORED AS PARQUET LOCATION 'cosn://...';INSERT INTO insert_part PARTITION (student_id = 444444) VALUES ('Dora', '134 Forest Ave');INSERT OVERWRITE insert_part PARTITION (student_id = 444444) VALUES ('New Name', 'New Addr');SELECT * FROM insert_part;-- ========== 三、Spark 文档: 分区表示例 ==========CREATE EXTERNAL TABLE students (name VARCHAR(64), address VARCHAR(64))USING PARQUET PARTITIONED BY (student_id INT) LOCATION 'cosn://...';INSERT INTO students VALUES ('Amy Smith', '123 Park Ave, San Jose', 111111);INSERT INTO students VALUES('Bob Brown', '456 Taylor St, Cupertino', 222222),('Cathy Johnson', '789 Race Ave, Palo Alto', 333333);INSERT INTO students (address, name, student_id) VALUES ('Hangzhou, China', 'Kent Yao', 11215016);INSERT INTO students PARTITION (student_id = 11215017) (address, name) VALUES ('Hangzhou, China', 'Kent Yao Jr.');-- ========== 四、OVERWRITE 多行 ==========INSERT OVERWRITE students VALUES('Ashua Hill', '456 Erica Ct, Cupertino', 111111),('Brian Reed', '723 Kern Ave, Palo Alto', 222222);-- ========== 五、BY NAME(按列名匹配)==========CREATE EXTERNAL TABLE persons (name STRING, address STRING) USING PARQUET LOCATION 'cosn://...';INSERT INTO persons VALUES ('Dora Williams', '100 Main St');INSERT INTO students PARTITION (student_id = 222222) BY NAMESELECT address, name FROM persons WHERE name = 'Dora Williams';INSERT OVERWRITE students PARTITION (student_id = 222222) BY NAMESELECT 'Unknown' as address, name FROM persons WHERE name = 'Dora Williams';-- ========== 六、TABLE 语句插入 ==========CREATE EXTERNAL TABLE visiting_students (name VARCHAR(64), address VARCHAR(64), student_id INT)USING PARQUET LOCATION 'cosn://...';INSERT INTO visiting_students VALUES ('Visitor One', '1 Visitor Lane', 999999);INSERT INTO students TABLE visiting_students;INSERT OVERWRITE students TABLE visiting_students;