1️⃣ 리텐션 분석 과제


1. Weekly Retention을 구하는 쿼리를 바닥부터 스스로 작성해보세요.

# Weekly Retention (Core: click_payment)
# Table info: 2022-08-01~2023-01-20, Row(731873), user_id(unique.49678), user_pseudo_id(unique.52823)
# 필요 컬럼: diff_of_week | user_cnt | first_week_user_cnt | retetion_rate
WITH base AS (
  SELECT 
    DISTINCT
      user_id
      , user_pseudo_id
      , DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')) AS event_date
  FROM advanced.app_logs
)
-- Week 단위 변환, First Week
, base_week AS (
  SELECT
    DISTINCT
      user_id
      , user_pseudo_id
      , DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week
      , DATE_TRUNC(MIN(event_date) OVER(PARTITION BY user_pseudo_id), WEEK(MONDAY)) AS first_week
  FROM base
)
-- diff_week, user count
, diff_week_counts AS (
  SELECT
    DATE_DIFF(event_week, first_week, WEEK) AS diff_of_week
    , COUNT(DISTINCT user_pseudo_id) AS user_cnt
  FROM base_week
  GROUP BY diff_of_week
)
-- diff0_col 생성
, retention_base AS (
  SELECT 
    *
    , FIRST_VALUE(user_cnt) OVER(ORDER BY diff_of_week) AS first_week_user_cnt
  FROM diff_week_counts
)
-- retetion_rate
SELECT
  *
  , ROUND(SAFE_DIVIDE(user_cnt, first_week_user_cnt),2) AS retention_rate
FROM retention_base
ORDER BY diff_of_week

2. Retention User를 주차별로 New + Current + Resurrected + Dormant User로 나누는 쿼리를 작성해보세요.

2-1. New, Dormant 기준 정의

<aside> 💡

스크린샷 2024-11-15 오후 3.45.39.png

스크린샷 2024-11-15 오후 3.46.06.png

2-2. 기준에 따른 주차별 New + Current + Resurrected + Dormant User 도출

유저 기준
신규(New) 첫 방문일 14일 이내인 유저
기존(Current) 첫 방문일 14일 초과 & 마지막 방문일 60일 이내
복귀(Resurrected) 첫 방문일 14일 초과 & 마지막 방문일 60일 이내 & 마지막 방문 이전 접속일 간격 60일 초과
휴면(Dormant) 첫 방문일 14일 초과 & 마지막 방문일 60일 초과한 유저
# 주차별로 New + Current + Resurrected + Dormant User
WITH base AS (
  SELECT
      DISTINCT
        user_pseudo_id
        , DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')) AS event_date
  FROM advanced.app_logs
  -- WHERE user_pseudo_id = '1001509348.6946038014' -- 동일주차 다른 날짜 접속 test case
)
-- 주차, 첫 방문일, 마지막(가장최근) 방문일 = LAG(1), 마지막 전 방문일 = LAG(2)
, user_event_date AS (
  SELECT
    user_pseudo_id
    , event_date
    , DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week
    , MIN(event_date) OVER(PARTITION BY user_pseudo_id) AS first_date
    , LAG(event_date) OVER(PARTITION BY user_pseudo_id ORDER BY event_date) AS last_date
    , LAG(event_date, 2) OVER(PARTITION BY user_pseudo_id ORDER BY event_date) AS last2_date
  FROM base
)
-- event_date 기준 diff week or day 계산
, user_event_diff AS (
  SELECT
    user_pseudo_id
    , event_week
    , first_date
    , event_date
    , last_date
    , last2_date
    , DATE_DIFF(event_date, first_date, DAY) AS first_diff_day
    , DATE_DIFF(event_date, last_date, DAY) AS last_diff_day
    , DATE_DIFF(last_date, last2_date, DAY) AS last_last2_diff_day
    -- 동일주차 여러번 접속 처리 위함 (e.g. resurrected, current -> resurrected만)
    , ROW_NUMBER() OVER(PARTITION BY user_pseudo_id, event_week ORDER BY event_date) AS rn 
  FROM user_event_date
  QUALIFY rn = 1
)
-- 주차별 유저 세그 조건 분기
, week_user_seg AS (
  SELECT
    DISTINCT
    *
    , CASE
        WHEN first_diff_day <= 14 THEN 'new'
        WHEN first_diff_day > 14 AND last_diff_day > 60 THEN 'dormant'
        WHEN first_diff_day > 14 AND last_diff_day <= 60 AND last_last2_diff_day > 60 THEN 'resurrected'
        WHEN first_diff_day > 14 AND last_diff_day <= 60 THEN 'current'
        ELSE 'others'
    END AS user_seg
  FROM user_event_diff
)
, week_user_seg_counts AS (
  SELECT 
    event_week
    -- , COUNT(DISTINCT user_pseudo_id) AS user_cnt
    , COUNT(DISTINCT CASE WHEN user_seg = 'new' THEN user_pseudo_id END) AS new_user
    , COUNT(DISTINCT CASE WHEN user_seg = 'current' THEN user_pseudo_id END) AS current_user
    , COUNT(DISTINCT CASE WHEN user_seg = 'resurrected' THEN user_pseudo_id END) AS resrrected_user
    , COUNT(DISTINCT CASE WHEN user_seg = 'dormant' THEN user_pseudo_id END) AS dormant_user
  FROM week_user_seg
  GROUP BY event_week
)
-- -- 수치 검증
-- SELECT 
--   *
--   , new_user + current_user + resrrected_user + dormant_user AS total_cnt
--   , user_cnt - (new_user + current_user + resrrected_user + dormant_user) AS diff_cnt
-- FROM week_user_seg_counts
-- WHERE user_cnt != (new_user + current_user + resrrected_user + dormant_user)
-- ORDER BY event_week
SELECT *
FROM week_user_seg_counts
ORDER BY event_week