题目如下:
由于推荐需要计算两个用户的共同朋友数量,有两个字段的表如下,两个字段分别代表,用户和朋友(朋友之间用逗号分隔):
user friends
A B,C,D,E
B C,E
C D,E
求:计算两个用户之间的共同朋友数量。
SELECT
CASE WHEN t1.user <= t2.user THEN t1.user
ELSE t2.user
END user1,
CASE WHEN t1.user <= t2.user THEN t2.user
ELSE t1.user
END user2,
COUNT(DISTINCT t1.friend) friends_cnt
FROM (
SELECT user,
friend
FROM (
SELECT 'A' user,
'B,C,D,E' friends
UNION ALL SELECT 'B' user,
'C,E' friends
UNION ALL SELECT 'C' user,
'D,E' friends
) LATERAL VIEW explode(split(friends, ',')) num AS friend
) t1
JOIN (
SELECT user,
friend
FROM (
SELECT 'A' user,
'B,C,D,E' friends
UNION ALL SELECT 'B' user,
'C,E' friends
UNION ALL SELECT 'C' user,
'D,E' friends
) LATERAL VIEW explode(split(friends, ',')) num AS friend
) t2
ON t1.friend = t2.friend
WHERE t1.user != t2.user
GROUP BY CASE WHEN t1.user <= t2.user THEN t1.user
ELSE t2.user
END,
CASE WHEN t1.user <= t2.user THEN t2.user
ELSE t1.user
END