Au-delà des window functions classiques, certains patterns SQL analytiques différencient vraiment les profils experts. En entretien Senior, ce sont ces patterns qu on teste.
Comment définissez-vous et calculez-vous des sessions utilisateur en SQL ?
-- Définir une session : séquence d événements
-- avec moins de 30 minutes d inactivité entre chaque
WITH events_with_gap AS (
SELECT
user_id,
event_time,
LAG(event_time) OVER (PARTITION BY user_id ORDER BY event_time) AS prev_event,
DATEDIFF('minute',
LAG(event_time) OVER (PARTITION BY user_id ORDER BY event_time),
event_time
) AS minutes_since_last
FROM events
),
session_start AS (
SELECT *,
CASE WHEN minutes_since_last > 30
OR minutes_since_last IS NULL
THEN 1 ELSE 0 END AS is_new_session
FROM events_with_gap
)
SELECT
user_id,
event_time,
SUM(is_new_session) OVER (
PARTITION BY user_id
ORDER BY event_time
ROWS UNBOUNDED PRECEDING
) AS session_id
FROM session_start;Comment calculez-vous un entonnoir de conversion en SQL ?
-- Funnel : combien d utilisateurs passent chaque étape
WITH user_steps AS (
SELECT
user_id,
MAX(CASE WHEN event = 'page_view' THEN 1 ELSE 0 END) AS step1,
MAX(CASE WHEN event = 'add_to_cart' THEN 1 ELSE 0 END) AS step2,
MAX(CASE WHEN event = 'checkout' THEN 1 ELSE 0 END) AS step3,
MAX(CASE WHEN event = 'purchase' THEN 1 ELSE 0 END) AS step4
FROM events
WHERE event_date >= '2025-01-01'
GROUP BY user_id
)
SELECT
COUNT(*) FILTER (WHERE step1 = 1) AS step1_users,
COUNT(*) FILTER (WHERE step2 = 1) AS step2_users,
COUNT(*) FILTER (WHERE step3 = 1) AS step3_users,
COUNT(*) FILTER (WHERE step4 = 1) AS step4_users,
ROUND(COUNT(*) FILTER (WHERE step2 = 1) * 100.0 /
NULLIF(COUNT(*) FILTER (WHERE step1 = 1), 0), 1) AS step1_to_2_pct
FROM user_steps;Comment calculez-vous un score RFM (Récence, Fréquence, Montant) en SQL ?
-- RFM : segmenter les clients selon leurs achats
WITH rfm_base AS (
SELECT
customer_id,
MAX(order_date) AS last_purchase,
COUNT(*) AS frequency,
SUM(amount) AS monetary,
DATEDIFF('day', MAX(order_date), CURRENT_DATE) AS recency_days
FROM orders
WHERE order_date >= DATEADD('year', -1, CURRENT_DATE)
GROUP BY customer_id
),
rfm_scores AS (
SELECT *,
NTILE(5) OVER (ORDER BY recency_days ASC) AS r_score,
NTILE(5) OVER (ORDER BY frequency DESC) AS f_score,
NTILE(5) OVER (ORDER BY monetary DESC) AS m_score
FROM rfm_base
)
SELECT *,
r_score || f_score || m_score AS rfm_segment,
CASE
WHEN r_score >= 4 AND f_score >= 4 THEN 'Champions'
WHEN r_score >= 3 AND f_score >= 3 THEN 'Loyal'
WHEN r_score <= 2 AND f_score >= 3 THEN 'At Risk'
ELSE 'Others'
END AS segment
FROM rfm_scores;Comment trouvez-vous les périodes de gap dans une série temporelle ?
-- Détecter les jours sans activité dans une série
WITH daily_activity AS (
SELECT DISTINCT DATE(event_time) AS active_date
FROM events
WHERE user_id = 'U001'
),
dates_with_next AS (
SELECT
active_date,
LEAD(active_date) OVER (ORDER BY active_date) AS next_date,
DATEDIFF('day', active_date,
LEAD(active_date) OVER (ORDER BY active_date)) AS gap_days
FROM daily_activity
)
SELECT
active_date AS gap_start,
next_date AS gap_end,
gap_days - 1 AS days_inactive
FROM dates_with_next
WHERE gap_days > 1 -- plus d 1 jour de gap
ORDER BY gap_days DESC;Comment répartissez-vous le crédit d une conversion entre plusieurs canaux marketing ?
-- Attribution linéaire : répartir également entre tous les touchpoints
WITH touchpoints_before_conversion AS (
SELECT
user_id,
channel,
touch_date,
conversion_revenue,
COUNT(*) OVER (PARTITION BY user_id) AS total_touches
FROM marketing_touches
JOIN conversions USING (user_id)
WHERE touch_date <= conversion_date
)
SELECT
channel,
SUM(conversion_revenue / total_touches) AS attributed_revenue,
COUNT(*) AS nb_touchpoints
FROM touchpoints_before_conversion
GROUP BY channel
ORDER BY attributed_revenue DESC;
-- Attribution last-touch
SELECT channel, SUM(conversion_revenue) AS attributed_revenue
FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY touch_date DESC) AS rn
FROM touchpoints_before_conversion
)
WHERE rn = 1
GROUP BY channel;Comment créez-vous un pivot dynamique sans connaître à l avance les valeurs des colonnes ?
-- Snowflake : PIVOT dynamique
SELECT *
FROM (
SELECT region, quarter, revenue
FROM sales_data
)
PIVOT (
SUM(revenue)
FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4')
) AS pivot_table;
-- BigQuery : agrégation conditionnelle
SELECT
region,
SUM(IF(quarter = 'Q1', revenue, 0)) AS Q1,
SUM(IF(quarter = 'Q2', revenue, 0)) AS Q2,
SUM(IF(quarter = 'Q3', revenue, 0)) AS Q3,
SUM(IF(quarter = 'Q4', revenue, 0)) AS Q4
FROM sales_data
GROUP BY region;| Niveau | Maitrise | Signal GO | NO-GO |
|---|---|---|---|
| Confirmé | Window functions, funnel basique, RFM | Écrit un funnel en SQL, calcule un RFM avec NTILE | Ne sait pas écrire un funnel sans GROUP BY séparé |
| Senior | Sessionisation, attribution, intervals/gaps | Implémente la sessionisation avec LAG, fait de l attribution multi-touch | Ne sait pas créer des sessions en SQL |
Premier entretien gratuit. Rapport GO/NO-GO sous 48h.