这里使用Mysql保存最终数据,Apache Doris保存处理过程中的数据
Apache Doris
https://github.com/apache/doris
创建表
CREATE TABLE `t_student` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`subject` varchar(255) DEFAULT NULL,
`score` double NULL DEFAULT NULL,
PRIMARY KEY (`id`)
);
插入数据
INSERT INTO t_student(name,subject,score) VALUES ('张三','语文',60);
INSERT INTO t_student(name,subject,score) VALUES ('张三','数学',100);
INSERT INTO t_student(name,subject,score) VALUES ('张三','英语',30);
INSERT INTO t_student(name,subject,score) VALUES ('李四','语文',80);
INSERT INTO t_student(name,subject,score) VALUES ('李四','数学',70);
INSERT INTO t_student(name,subject,score) VALUES ('李四','英语',60);
INSERT INTO t_student(name,subject,score) VALUES ('王五','语文',30);
INSERT INTO t_student(name,subject,score) VALUES ('王五','数学',80);
INSERT INTO t_student(name,subject,score) VALUES ('王五','英语',60);
准备数据
create table t_student2 as (
select name,
max(case subject when '语文' then score else null end) as '语文',
max(case subject when '数学' then score else null end) as '数学',
max(case subject when '英语' then score else null end) as '英语'
from t_student group by name
);
结果如下
创建表
CREATE TABLE `t_student_detail` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`phone` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`)
);
插入数据
INSERT INTO t_student_detail(name,phone) VALUES ('张三','15225178321');
INSERT INTO t_student_detail(name,phone) VALUES ('李四','13607686585');
INSERT INTO t_student_detail(name,phone) VALUES ('王五','13837857835');
INSERT INTO t_student_detail(name,phone) VALUES ('赵六',null);
先查出要进行行转列的列属性的值
select DISTINCT subject FROM t_student;
再生成SQL
select name 姓名,
sum(case subject when '语文' then score else 0 end) 语文,
sum(case subject when '数学' then score else 0 end) 数学,
sum(case subject when '英语' then score else 0 end) 英语
from t_student group by name
SELECT name as 姓名,
SUM(IF(subject = '语文', score, 0)) AS 语文,
SUM(IF(subject = '数学', score, 0)) AS 数学,
SUM(IF(subject = '英语', score, 0)) AS 英语
FROM t_student
GROUP BY name;
一种说法是Mysql8开始支持povit,但是
这里测试的Mysql8也不支持povit关键字。
示例
SELECT pivot_table.*
FROM t_student
PIVOT(
SUM(score)
FOR subject IN ('语文','数学','英语')
) AS pivot_table;
查询Mysql版本
SELECT @@version;
SELECT VERSION();
实现列转行
select name,'语文' as subject,语文 as 'score' from t_student2
union all
select name,'数学' as subject,数学 as 'score' from t_student2
union all
select name,'英语' as subject,英语 as 'score' from t_student2
order by name;
where
TopN
select * FROM t_student_detail LIMIT 2;
BottomN
select * from (select * FROM t_student_detail ORDER BY id desc LIMIT 2) t1 ORDER BY t1.id asc ;
区间
select * FROM t_student_detail LIMIT 2 OFFSET 2;
COALESCE()
null替换为设置的字符
select name,COALESCE(phone,'-') as phone from t_student_detail;
SELECT DISTINCT name,subject,score FROM t_student;
SELECT (@i:= @i + 1) AS rownum, name, subject,score FROM t_student, (SELECT @i:=0) AS rownum ORDER BY score desc;
#直接替换字符串中的部分字符
SELECT name,replace(phone,substring(phone,4,4),'****') as phone FROM t_student_detail;
select name,case name when '张三' then '-' ELSE phone END as phone from t_student_detail;
注意对null无效
select name,case phone when null then '-' ELSE phone END as phone from t_student_detail;
脱敏
SELECT name,replace(phone,substring(phone,4,4),'****') as phone FROM t_student_detail;
SELECT name,MD5(phone)as phone FROM t_student_detail; -- 进行MD5加密
使用AES加解密
SELECT AES_DECRYPT(AES_ENCRYPT('码客说', '1234567891011121'),'1234567891011121');
目前派生列支持的函数有:
函数名称 | 函数及示例 | 说明 |
---|---|---|
累积 | sum(<column>) over(partition by <columns> order by <columns>) | 对数字型字段进行累计计算。Sum():用于设置求和计算字段;Partition by():用于设置分组的字段;Order by():用于设置排序的字段; |
字符拼接 (拼接符) | concat_ws("sep",str1,str2,...,strN) | 通过sep连接符将字符串拼接。sep由用户自定义。str:表示连接的字符。 |
字符拼接 | concat(str1, str2, ..., strN) | 不需要连接符,直接将字符串连接。str:表示连接的字符。 |
字符截取 | substring(<column>, pos[, len]) | 从字段中截取指定位置的len个字符。<column>:表示截取的字段;pos:表示从第几个位置开始截取,设置为正整数;len:表示一共取多少个字符,设置为正整数; |
小数点保留 | format_number(<column>, num) | 保留浮点型字段小数点位数。<column>:表示设置小数点位数的字段;num:表示小数点后保留的位数,设置为0和正整数; |
内容提取(年) | year(<column>) | 截取时间字段中的年信息。<column>:表示截取的字段; |
内容提取(月) | month(<column>) | 截取时间字段中的月信息。<column>:表示截取的字段; |
内容提取(日) | day(<column>) | 截取时间字段中的日信息。<column>:表示截取的字段; |
内容提取(时) | hour(<column>) | 截取时间字段中的时信息。<column>:表示截取的字段; |
内容提取(分) | minute(<column>) | 截取时间字段中的分信息。 <column>:表示截取的字段; |
内容提取(秒) | second(<column>) | 截取时间字段中的秒信息。<column>:表示截取的字段; |
向上取整 | ceiling(<column>) | 获取大于或等于取整字段的最小整数。 <column>:表示取整的字段。 |
向下取整 | floor(<column>) | 获取小于或等于取整字段的最大整数。 <column>:表示取整的字段。 |
四舍五入 | round(<column>) | 获取四舍五入后的整数。 <column>:表示四舍五入的字段。 |
行最小值 | least(<column>,<column>,<column>) | 获取指定行中的最小值。 <column>:表示用来比较最小值的字段,默认三个,可以有多个字段。 |
行最大值 | greatest(<column>,<column>,<column>) | 获取指定行中的最大值。 <column>:表示用来比较最大值的字段,默认三个,可以有多个字段。 |
示例
select name ,least(`语文`,`数学`,`英语`) as min_score FROM t_student2;
select name ,greatest(`语文`,`数学`,`英语`) as max_score FROM t_student2;
常见聚合函数有:
select name,max(score) max_score FROM t_student GROUP BY name;
select name,min(score) min_score FROM t_student GROUP BY name;
select name,avg(score) avg_score FROM t_student GROUP BY name;
select name,sum(score) total_score FROM t_student GROUP BY name;
此外,MySQL还提供一些高级聚合函数:
示例
select name,STD(score) std_score FROM t_student GROUP BY name;
select name,VARIANCE(score) var_score FROM t_student GROUP BY name;
select name,STDDEV_POP(score) std_pop_score FROM t_student GROUP BY name;
select name,STDDEV_SAMP(score) std_samp_score FROM t_student GROUP BY name;
select name,VAR_POP(score) var_pop_score FROM t_student GROUP BY name;
select name,VAR_SAMP(score) var_samp_score FROM t_student GROUP BY name;