E-commerce A/B Test¶
A complete worked example: testing a new checkout flow in an online store.
Load the dataset¶
from splita.datasets import load_ecommerce
data = load_ecommerce()
print(data["description"])
E-commerce A/B test: new checkout flow vs. existing.
5,000 users per group over 28 days.
Revenue is heavy-tailed (log-normal), with weekend lift.
Segments: new (50%), returning (35%), loyal (15%).
Expected effect: +1.5pp conversion uplift (~19% relative).
The dataset contains:
control/treatment: revenue per user (0 if no purchase)pre_control/pre_treatment: pre-experiment page viewstimestamps: day index (0-27)user_segments: 'new', 'returning', 'loyal'
Step 1: Plan the experiment¶
from splita import SampleSize
# Baseline ~8% conversion, want to detect 1.5pp lift
plan = SampleSize.for_proportion(baseline=0.08, mde=0.015, power=0.80)
print(f"Need {plan.n_per_variant} users per variant")
print(f"At 1,000 users/day: {plan.duration(1000).days_needed} days")
Step 2: Check data quality¶
from splita import SRMCheck
ctrl = data["control"]
trt = data["treatment"]
# SRM check
srm = SRMCheck([len(ctrl), len(trt)]).run()
print(f"SRM passed: {srm.passed} (p={srm.pvalue:.4f})")
assert srm.passed, srm.message
Step 3: Handle outliers¶
Revenue data is heavy-tailed. Winsorize before analysis.
from splita.variance import OutlierHandler
handler = OutlierHandler(method='winsorize')
ctrl_clean, trt_clean = handler.fit_transform(ctrl, trt)
Step 4: Apply CUPED variance reduction¶
Use pre-experiment page views to reduce variance.
from splita.variance import CUPED
cuped = CUPED()
ctrl_adj, trt_adj = cuped.fit_transform(
ctrl_clean, trt_clean,
data["pre_control"], data["pre_treatment"],
)
print(f"Variance reduction: {cuped.variance_reduction_:.0%}")
Step 5: Analyze the primary metric (revenue)¶
from splita import Experiment
result = Experiment(ctrl_adj, trt_adj).run()
print(result)
Step 6: Analyze conversion rate separately¶
import numpy as np
ctrl_conv = (data["control"] > 0).astype(float)
trt_conv = (data["treatment"] > 0).astype(float)
conv_result = Experiment(ctrl_conv, trt_conv).run()
print(f"Conversion lift: {conv_result.relative_lift}")
print(f"Significant: {conv_result.significant}")
Step 7: Correct for multiple testing¶
from splita import MultipleCorrection
corrected = MultipleCorrection(
[result.pvalue, conv_result.pvalue],
labels=["revenue", "conversion"],
).run()
print(f"Rejected: {corrected.rejected}")
print(f"Adjusted p-values: {corrected.adjusted_pvalues}")
Step 8: Segment analysis¶
Check if the treatment effect varies by user segment.
from splita import InteractionTest
segments = data["user_segments"]
unique_segments = np.unique(segments)
segment_results = {}
for seg in unique_segments:
mask = segments == seg
seg_result = Experiment(
data["control"][mask],
data["treatment"][mask],
).run()
segment_results[seg] = seg_result
print(f"{seg}: lift={seg_result.lift:.4f}, p={seg_result.pvalue:.4f}")
Step 9: Explain and report¶
from splita import explain, report
print(explain(result))
print("---")
print(report(result))
Step 10: Bayesian perspective¶
from splita import BayesianExperiment
bayes = BayesianExperiment(ctrl_adj, trt_adj).run()
print(f"P(treatment better): {bayes.prob_treatment_better:.3f}")
print(f"Expected loss: {bayes.expected_loss:.5f}")
Full pipeline in 15 lines¶
from splita import Experiment, SRMCheck, explain
from splita.datasets import load_ecommerce
from splita.variance import CUPED, OutlierHandler
data = load_ecommerce()
ctrl, trt = data["control"], data["treatment"]
assert SRMCheck([len(ctrl), len(trt)]).run().passed
ctrl, trt = OutlierHandler(method='winsorize').fit_transform(ctrl, trt)
ctrl, trt = CUPED().fit_transform(ctrl, trt, data["pre_control"], data["pre_treatment"])
result = Experiment(ctrl, trt).run()
print(explain(result))