# 一道SQL题的前世今生

#### 来看一道SQL题目：

```select a.reg_date, count(distinct a.uid) as new_usr_cnt,
count(distinct b.uid) as 3day_fav_usr_cnt,--3日收藏用户数
from
(
select reg_date, uid
from t_new_user
where reg_date = '2019-07-01'
group by reg_date, uid
) a
left join
(
select fav_date, uid
from t_favorite_act
where fav_date between '2019-07-01' and date_add('2019-07-01', 2)
group by fav_date, uid
) b
on a.uid = b.uid
left join
(
) c
on a.uid = c.uid
group by a.reg_date
```

```select a.reg_date, count(distinct a.uid) as new_install_usr_cnt,
count(distinct b.uid) as new_install_3day_fav_usr_cnt, --3日收藏用户数
from
(
select reg_date as new_date, uid
from t_new_user
where reg_date = '2019-07-01'

union all

select install_date as new_date, uid
from t_new_install_user
where install_date = '2019-07-01'
) a
left join
(
select fav_date, uid
from t_favorite_act
where fav_date between '2019-07-01' and date_add('2019-07-01', 2)
group by fav_date, uid
) b
on a.uid = b.uid
left join
(
) c
on a.uid = c.uid
group by a.reg_date
```

```select a.reg_date,
count(distinct case when a.type='new_user' then a.uid else null end) as new_usr_cnt,
count(distinct case when a.type='new_user' then b.uid else null end) as fav_usr_cnt,
count(distinct case when a.type='new_user' then c.uid else null end) as add_cart_usr_cnt,
count(distinct a.uid) as new_install_usr_cnt,
count(distinct b.uid) as new_install_3day_fav_usr_cnt,
from
(
select reg_date as new_date, uid, 'new_user' as type
from t_new_user
where reg_date = '2019-07-01'

union all

select install_date as new_date, uid, 'reinstall' as type
from t_new_install_user
where install_date = '2019-07-01'

) a
left join
(
select fav_date, uid
from t_favorite_act
where fav_date between '2019-07-01' and date_add('2019-07-01', 2)
group by fav_date, uid
) b
on a.uid = b.uid
left join
(
) c
on a.uid = c.uid
group by a.reg_date
```

datediff函数可以求两个日期的时间差。虽然目前的需求是求当日，三日，七日，也就是日期差分别是0，2，6(注意不是1，3，7)的情况。如果我们能求出所有的时间差diff_day，再对diff_day进行判断，不仅能处理当前的需求，就是再来几个别的不同的日期，那也可以只用一个case when就解决了，代码如下，显然比原来更清晰简洁。

```select
a.reg_date,
count(distinct case when fav_datediff = 0 and a.type='new_user' then a.uid) as new_install_user_current_day,
count(distinct case when fav_datediff = 0 and a.type='new_user' then b.uid else null end) as new_user_current_day_fav,
count(distinct case when fav_datediff > 0 and fav_datediff <= 2 and a.type='new_user' then b.uid else null end) as new_user_3_day_fav,
count(distinct case when fav_datediff > 0 and fav_datediff <= 6 and a.type='new_user' then b.uid else null end) as new_user_7_day_fav,
count(distinct case when add_datediff = 0 and a.type='new_user' then c.uid else null end) as new_user_current_day_add,
count(distinct case when add_datediff > 0 and add_datediff <= 2 and a.type='new_user' then c.uid else null end) as new_user_3_day_add,
count(distinct case when add_datediff > 0 and add_datediff <= 6 and a.type='new_user' then c.uid else null end) as new_user_7_day_add,
count(distinct case when fav_datediff = 0 then a.uid) as new_install_user_current_day,
count(distinct case when fav_datediff = 0 then b.uid else null end) as new_install_user_current_day_fav,
count(distinct case when fav_datediff > 0 and fav_datediff <= 2 then b.uid else null end) as new_install_user_3_day_fav,
count(distinct case when fav_datediff > 0 and fav_datediff <= 6 then b.uid else null end) as new_install_user_7_day_fav,
count(distinct case when add_datediff = 0 and then c.uid else null end) as new_install_user_current_day_add,
count(distinct case when add_datediff > 0 and add_datediff <= 2 then c.uid else null end) as new_install_user_3_day_add,
count(distinct case when add_datediff > 0 and add_datediff <= 6 then c.uid else null end) as new_install_user_7_day_add
from
(
select  a.reg_date, a.uid, a.type, b.uid, c.uid,
datediff(b.fav_date, a.reg_date) as fav_datediff,
from
(
select reg_date as new_date, uid, 'new_user' as type
from t_new_user
where reg_date = '2019-07-01'

union all

select install_date as new_date , uid, 'reinstall' as type
from t_new_install_user
where install_date = '2019-07-01'

) a
left join
(
select fav_date, uid
from t_favorite_act
where fav_date between '2019-07-01' and date_add('2019-07-01', 7)
--注意这里要取能满足datediff=6的日期范围
group by fav_date, uid
) b
on a.uid = b.uid
left join
(
--注意这里要取能满足datediff=6的日期范围
) c
on a.uid = c.uid
) d
```

```--次留
select a.reg_date, count(distinct a.uid) new_user, count(distinct b.uid) act_user
from
(
select reg_date, uid
from t_new_user
where reg_date >='20190801' and  reg_date <='20190807'
group by
reg_date, uid --每日每组新增用户
) a
left join
(
select act_date, uid
from t_active_user
where act_date >='20190801' and  act_date <='20190808'--至少要到0808才能保证0807有留存数据
group by act_date, uid  --每日每组日活用户
) b
on a.uid = b.uid
where datediff(b.act_date, a.reg_date) = 1
group by a.reg_date
```

```--次留
select a.reg_date, count(distinct a.uid) new_user,
count(distinct case when datediff(b.act_date, a.reg_date) = 1 then b.uid else null end) act_user
from
(
select reg_date, uid
from t_new_user
where reg_date >='20190801' and  reg_date <='20190807'
group by
reg_date, uid
) a
left join
(
select act_date, uid
from t_active_user
where act_date >='20190801' and  act_date <='20190808'--至少要到0808才能保证0807有留存数据
group by act_date, uid
) b
on a.uid = b.uid
group by a.reg_date
```

```select a.reg_date, count(distinct a.uid) new_user,
count(distinct case when datediff(b.act_date, a.reg_date) = 1 then b.uid else null end) 2day_act_user，
count(distinct case when datediff(b.act_date, a.reg_date) = 6 then b.uid else null end) 7day_act_user
from
(
select reg_date, uid
from t_new_user
where reg_date >='20190801' and  reg_date <='20190802'--有些天还不能计算7留的数据
group by
reg_date, uid
) a
left join
(
select act_date, uid
from t_active_user
where act_date >='20190801' and  act_date <='20190808'--至少要到0808才能保证0802有7日留存数据
group by act_date, uid
) b
on a.uid = b.uid
group by a.reg_date
```

```select
sum(case when datediff=0 then user_cnt else 0 end),
sum(case when datediff=1 then user_cnt else 0 end)
from
(
select datediff, count(distinct uid) as user_cnt
from
(
select uid, datediff(act_date，'2019-08-01') as datediff
from t_user_active_accu
where ds = '20190808'--ds表示分区，取一个最新的即可
and datediff('2019-08-01', act_date) >=0
) a
group by datediff
) b
```

0 条评论

• ### Python 3.9 也要来了？

Python 发布了版本号为 3.9.0b3 的 beta 版，后续即将发布 Python 3.9 的正式版。该版本包含了一些令人兴奋的新特性，预计正式版发布以...

• ### 你的灵魂画作都去哪儿了？“猜画小歌”背后的5000万组数据

自从退出中国的搜索引擎市场，谷歌大概无时不刻都想”卷土重来“。昨天，这家以搜索引擎著称的巨头公司用一款微信小程序“猜画小歌”占领了朋友圈“C位”，刷足存在感的同...

• ### Python 爬取 4027 条脉脉职言，解读互联网人的苦与难！

脉脉是一个实名职场社交平台。之前爬了脉脉职言版块，大概爬了4027条评论，本文对爬取过程给出详细说明，对于评论内容仅做可视化分析。

• ### 字符串与编码

在最新的Python 3版本中，字符串是以Unicode编码的，即Python的字符串支持多语言

• ### 松哥手把手带你捋一遍 Spring Security 登录流程

为什么想和大家捋一捋 Spring Security 登录流程呢？这是因为之前小伙伴们的一个提问：如何在 Spring Security 中动态修改用户信息？

• ### 重构 - 你为什么要对参数赋值？

对于学过多门语言的开发者来说，应该都明白在不同语言中对参数赋值有着不同的意义， 总的来说参数有值传递和引用传递两种， 而在Java中只有值传递的概念。

• ### Rainbond-Java源码构建自定义JDK版本

Rainbond官方提供了多个版本的OpenJDK供用户使用。这些OpenJDK的安装包托管于好雨科技官方的OSS（对象存储）中。能够接入互联网的Rainbon...

• ### #算法基础#选择和插入排序

算法是基础，小蓝同学准备些总结一系列算法分享给大家，这是第二篇《选择和插入排序》，非常赞！希望对大家有帮助，大家会喜欢！ 系列文章： 由快速排序到分治思想 ...

• ### 号称完全自主研发的国产编程语言「木兰」实为 Python 套壳，官方致歉！

1 月 15 日，一篇题为「面向智能物联 中国科研团队发布 “木兰” 编程语言体系」的文章刷爆了朋友圈。

• ### UWP 手绘视频创作工具技术分享系列 - 手绘视频导出

手绘视频最终的生成物是视频文件，前面几篇主要讲的是手绘视频的创作部分，今天讲一下手绘视频的导出问题。主要以 UWP 为例，另外会介绍一些 Web 端遇到的问题和...