# 1. 计算平台同时在线的主播人数最大值为多少,以及出现高峰期的时间段
# 主播表
create table zhubo  | |
( | |
id int,  | |
    stt string, | |
edt string  | |
) | |
row format delimited fields terminated by '\t';  | 
# 数据
insert overwrite table zhubo VALUES  | |
(1001,'2021-09-04 12:12:12','2021-09-04 20:12:12'),  | |
(1002,'2021-09-04 12:13:12','2021-09-04 19:12:12'),  | |
(1003,'2021-09-04 13:12:12','2021-09-04 19:50:12'),  | |
(1004,'2021-09-04 13:15:12','2021-09-04 18:12:12'),  | |
(1005,'2021-09-04 14:12:12','2021-09-04 19:12:15'),  | |
(1006,'2021-09-04 14:16:12','2021-09-04 20:12:23'),  | |
(1003,'2021-09-04 20:12:12','2021-09-04 23:12:12'),  | |
(1001,'2021-09-04 21:12:12','2021-09-04 22:12:12');  | 
- 把时间段转成时间点,模拟流式数据,标记位上线 + 1,下线 - 1
 
select id, stt dt, 1 flag  | |
from zhubo | |
union | |
select id, edt dt, -1 flag  | |
from zhubo; t1  | 
- 统计每个时间点的在线人数
 
select id,  | |
       dt, | |
sum(flag) over (order by dt) people  | |
from (select id, stt dt, 1 flag  | |
      from zhubo | |
      union | |
select id, edt dt, -1 flag  | |
from zhubo) t1; t2  | 
- 按照 时间流将下一个时间点移至当前行
 
select dt,  | |
       people, | |
lead(dt, 1) over (order by dt)  | |
from (select id, dt, sum(flag) over (order by dt) people  | |
from (select id, stt dt, 1 flag  | |
            from zhubo | |
            union | |
select id, edt dt, -1 flag  | |
from zhubo) t1) t2;  | 
- 求最大在线人数
 
select max(people) max_p  | |
from (select id, dt, sum(flag) over (order by dt) people  | |
from (select id, stt dt, 1 flag  | |
            from zhubo | |
            union | |
select id, edt dt, -1 flag  | |
from zhubo) t1) t2;  | 
- 最后将 3、4 做 join
 
同时在线人数的表多次被使用  | |
with t as (  | |
select id,  | |
           dt, | |
sum(flag) over (order by dt) people  | |
from (select id, stt dt, 1 flag  | |
          from zhubo | |
          union | |
select id, edt dt, -1 flag  | |
from zhubo) t1  | |
) | |
select dt, people, dtt  | |
from (  | |
select dt, people, lead(dt, 1) over (order by dt) dtt  | |
         from t | |
     ) t1 | |
join (  | |
select max(people) max_p  | |
    from t | |
) t2 on t1.people = t2.max_p  |