字节大数据sql原题(五种解法)
显示高出勤期间的id和visitor_cnt。高出勤期间定义为三个及以上连续的id编号,访客人数超过100人。按照id升序排序输出结果。
·
显示高出勤期间的id和visitor_cnt。高出勤期间定义为三个及以上连续的id编号,访客人数超过100人 。按照id升序排序输出结果。
数据源
with data as (
select 1 as id, 100 as visitor_cn union all
select 2 as id, 10 as visitor_cn union all
select 3 as id, 120 as visitor_cn union all
select 4 as id, 110 as visitor_cn union all
select 5 as id, 700 as visitor_cn union all
select 6 as id, 100 as visitor_cn union all
select 8 as id, 60 as visitor_cn union all
select 9 as id, 160 as visitor_cn union all
select 10 as id, 160 as visitor_cn union all
select 11 as id, 160 as visitor_cn
)
思路
连续id,这个已经锚定了一个思路,打标分组,把连续的id分到同一组上
解法1
where
限定条件过滤不合适的数据,row_number()
直接排序,此时得到全局连续的rn
,但原id
过滤后就是不连续的,此时原id
减去rn
,就会得到一个个标记,标记相同就是同一组并连续的,不同则是不同一组且不连续的,开窗count()
统计个数并过滤不符合要求的
select id, visitor_cn
from (
select id, visitor_cn
, count(1) over(partition by id - row_number() over (order by id)
rows between unbounded preceding and unbounded following) cnt
from data
where visitor_cn > 100
) t
where cnt >= 3
order by id
解法2
lag()
取上一条数据id
,使用if()
打标0、1,sum()
累加分组汇总
select id, visitor_cn
from (
select id, visitor_cn, count(1) over(partition by par rows between unbounded preceding and unbounded following) cnt
from (
select id, visitor_cn, sum(`if`(id - lag(id) over(order by id) = 1, 0, 1)) over(order by id) par
from data
where visitor_cn > 100
) t
) t1
where cnt >= 3
解法3
使用left join
左外不等连接,取得连续一个id
,开窗count(B.id)
累计一个伪连续序列(0,1,2,3,3,4,5),A.id - count(B.id) over()
,一个不连续序列(1,2,3,4,7,8,9)减去一个伪连续序列(0,1,2,3,3,4,5),求得分组
select id, visitor_cn
from (
SELECT A.id, A.visitor_cn
,count(1) over(partition by A.id - COUNT(B.id) OVER (ORDER BY A.id)
rows between unbounded preceding and unbounded following) AS cnt
FROM data A
LEFT JOIN data B ON A.id = B.id + 1
WHERE A.visitor_cn > 100
) t
where cnt >= 3
解法4
使用子查询,lag()
取前二位数并与id
相减,过滤后构造array(i,i-1,i-2)
数组并炸裂开去重得到连续id
select id, visitor_cn
from data
where id in (
select distinct explode(`array`(id, id - 1, id - 2)) id
from (
select id, visitor_cn, lag(id, 2) over(order by id) rk
from data
where visitor_cn > 100
) t1
where id - 2 = rk
)
order by id
解法5
道理一样,不过使用count() over(range)
取等于_3_ 的数据行,过滤后构造array(i,i-1,i-2)
数组并炸裂开去重得到连续id
select id, visitor_cn
from data
where id in (
select distinct explode(`array`(id, id - 1, id - 2)) id
from (
select id, visitor_cn
, count(1) over(order by id range between 2 preceding and current row) cnt
from data
where visitor_cn > 100
) t1
where cnt = 3
)
order by id
这是我的几个解法,主要是这个打标分组的逻辑很重要,通过函数计算前后项从而打上标签
有新的解法欢迎下方留言~
sql代码附录
with data as (
select 1 as id, 100 as visitor_cn union all
select 2 as id, 100 as visitor_cn union all
select 3 as id, 120 as visitor_cn union all
select 4 as id, 110 as visitor_cn union all
select 5 as id, 700 as visitor_cn union all
select 6 as id, 100 as visitor_cn union all
select 8 as id, 60 as visitor_cn union all
select 9 as id, 160 as visitor_cn union all
select 10 as id, 160 as visitor_cn union all
select 11 as id, 160 as visitor_cn
)
--------------------------------------count()&row_number()-------------------------------------
-- select id, visitor_cn
-- from (
-- select id, visitor_cn
-- , count(1) over(partition by id - row_number() over (order by id)
-- rows between unbounded preceding and unbounded following) cnt
-- from data
-- where visitor_cn > 100
-- ) t
-- where cnt >= 3
-- order by id
------------------------------------------sum()&lag()------------------------------------------------
-- select id, visitor_cn
-- from (
-- select id, visitor_cn, count(1) over(partition by par rows between unbounded preceding and unbounded following) cnt
-- from (
-- select id, visitor_cn, sum(`if`(id - lag(id) over(order by id) = 1, 0, 1)) over(order by id) par
-- from data
-- where visitor_cn > 100
-- ) t
-- ) t1
-- where cnt >= 3
--------------------------------------count()&LEFT JOIN------------------------------------------------------
-- select id, visitor_cn
-- from (
-- SELECT A.id, A.visitor_cn
-- ,count(1) over(partition by A.id - COUNT(B.id) OVER (ORDER BY A.id)
-- rows between unbounded preceding and unbounded following) AS cnt
-- FROM data A
-- LEFT JOIN data B ON A.id = B.id + 1
-- WHERE A.visitor_cn >= 100
-- ) t
-- where cnt >= 3
-----------------------------------in () & array()$explode()-----------------------------------------------------
-- select id, visitor_cn
-- from data
-- where id in (
-- select distinct explode(`array`(id, id - 1, id - 2)) id
-- from (
-- select id, visitor_cn, lag(id, 2) over(order by id) rk
-- from data
-- where visitor_cn > 100
-- ) t1
-- where id - 2 = rk
-- )
-- order by id
---------------------------- explode() & array() & count() over(range)----------------------------------------------------
-- select id, visitor_cn
-- from data
-- where id in (
-- select distinct explode(`array`(id, id - 1, id - 2)) id
-- from (
-- select id, visitor_cn
-- , count(1) over(order by id range between 2 preceding and current row) cnt
-- from data
-- where visitor_cn > 100
-- ) t1
-- where cnt = 3
-- )
-- order by id
更多推荐
所有评论(0)