描述
TABLESAMPLE 语句用于对表进行采样,支持按行数 (ROWS)、百分比 (PERCENT) 或桶 (BUCKET) 采样。示例
-- 创建测试表CREATE EXTERNAL TABLE sample_table (id INT, name STRING, value DOUBLE)USING PARQUET LOCATION 'cosn://<your_cos_bucket>/test_sampling/sample_table';INSERT INTO sample_table VALUES(1,'a',10.0),(2,'b',20.0),(3,'c',30.0),(4,'d',40.0),(5,'e',50.0),(6,'f',60.0),(7,'g',70.0),(8,'h',80.0),(9,'i',90.0),(10,'j',100.0);-- 按行数采样 (支持)SELECT * FROM sample_table TABLESAMPLE (5 ROWS);-- 按百分比采样 (支持)SELECT * FROM sample_table TABLESAMPLE (50 PERCENT);-- 按桶采样 (支持,注意: 不指定列名 ON id)SELECT * FROM sample_table TABLESAMPLE (BUCKET 3 OUT OF 10);-- 替代方案: 使用 rand() 函数采样 (支持)SELECT * FROM sample_table WHERE rand() < 0.5;