Spaces:
Sleeping
Sleeping
| # Question: Does a higher bill amount lead to a lower tip percentage? | |
| import numpy as np | |
| import seaborn as sns | |
| import streamlit as st | |
| import altair as alt | |
| st.set_page_config(page_title="Tips Explorer: Bill vs Tip %", page_icon="πΈ", layout="wide") | |
| # 1) Data loading | |
| def load_data(): | |
| df = sns.load_dataset("tips").copy() | |
| df["tip_pct"] = df["tip"] / df["total_bill"] * 100 | |
| keep = ["total_bill", "tip", "tip_pct", "sex", "smoker", "day", "time", "size"] | |
| df = df[keep].dropna() | |
| return df | |
| tips = load_data() | |
| # 2) Title & problem statement | |
| st.title("πΈ Do Bigger Bills Mean Smaller Tip % ?") | |
| st.caption("Explore whether higher bills are associated with lower tipping percentages.") | |
| st.markdown( | |
| "> **User question:** Does a higher bill amount lead to a lower tip percentage?" | |
| ) | |
| # 3) Sidebar controls (β₯ 2) | |
| st.sidebar.header("Filters") | |
| # (a) bill | |
| bill_min = float(tips["total_bill"].min()) | |
| bill_max = float(tips["total_bill"].max()) | |
| bill_range = st.sidebar.slider( | |
| "Total bill range ($)", | |
| min_value=round(bill_min, 1), | |
| max_value=round(bill_max, 1), | |
| value=(round(bill_min, 1), round(bill_max, 1)), | |
| step=0.5, | |
| ) | |
| # (b) weekdays | |
| days = ["Thur", "Fri", "Sat", "Sun"] | |
| day_choice = st.sidebar.multiselect("Day(s) of week", days, default=days) | |
| # (c) mealtime | |
| time_choice = st.sidebar.radio("Meal", options=["All", "Lunch", "Dinner"], index=0) | |
| # (d) Outlier Removal | |
| clip_outliers = st.sidebar.checkbox("Remove extreme tip % (top/bottom 1%)", value=True) | |
| # 4) Apply filters | |
| df = tips[ | |
| (tips["total_bill"] >= bill_range[0]) & | |
| (tips["total_bill"] <= bill_range[1]) & | |
| (tips["day"].isin(day_choice)) | |
| ].copy() | |
| if time_choice != "All": | |
| df = df[df["time"] == time_choice] | |
| # Outlier Removal (for More Stable KPIs and Visualizations) | |
| if clip_outliers and len(df) > 10: | |
| low, high = np.percentile(df["tip_pct"], [1, 99]) | |
| df = df[(df["tip_pct"] >= low) & (df["tip_pct"] <= high)] | |
| # 5) KPIs (β₯ 1) | |
| col1, col2, col3 = st.columns(3) | |
| if len(df) > 0: | |
| avg_tip_pct = df["tip_pct"].mean() | |
| med_tip_pct = df["tip_pct"].median() | |
| corr = df["total_bill"].corr(df["tip_pct"]) # Pearson Correlation | |
| col1.metric("Average Tip %", f"{avg_tip_pct:.1f}%") | |
| col2.metric("Median Tip %", f"{med_tip_pct:.1f}%") | |
| col3.metric("Corr( Bill , Tip % )", f"{corr:+.2f}") | |
| else: | |
| col1.metric("Average Tip %", "β") | |
| col2.metric("Median Tip %", "β") | |
| col3.metric("Corr( Bill , Tip % )", "β") | |
| st.divider() | |
| # 6) Visualization (β₯ 1) | |
| st.subheader("Tip Percentage vs. Bill Amount") | |
| if len(df) == 0: | |
| st.info("No data under current filters. Try expanding the bill range or selecting more days.") | |
| else: | |
| base = alt.Chart(df).properties(width=800, height=420) | |
| scatter = ( | |
| base.mark_circle(size=70, opacity=0.65, color="#4C78A8") | |
| .encode( | |
| x=alt.X("total_bill:Q", title="Total Bill ($)"), | |
| y=alt.Y("tip_pct:Q", title="Tip Percentage (%)"), | |
| tooltip=[ | |
| alt.Tooltip("total_bill:Q", title="Bill ($)", format=".2f"), | |
| alt.Tooltip("tip_pct:Q", title="Tip %", format=".1f"), | |
| alt.Tooltip("day:N", title="Day"), | |
| alt.Tooltip("time:N", title="Meal"), | |
| alt.Tooltip("size:Q", title="Party Size"), | |
| ], | |
| ) | |
| ) | |
| # Used Altair's built-in regression function, which automatically plots the trend line | |
| reg = ( | |
| base.transform_regression("total_bill", "tip_pct") | |
| .mark_line(color="#E45756", size=3) | |
| .encode(x="total_bill:Q", y="tip_pct:Q") | |
| ) | |
| chart = (scatter + reg).resolve_scale(y="independent") | |
| st.altair_chart(chart, use_container_width=True) | |
| # 7) Dynamic insight text | |
| def insight_text(n, r, avg): | |
| if n == 0: | |
| return "No data available under the current filters." | |
| # Turn the correlation (r) into a plain-English explanation | |
| # For example: | |
| # Large r β Bigger bills usually mean higher tip percentages | |
| # Small r β Little to no relationship | |
| # Negative r β Bigger bills usually mean lower tip percentages | |
| if r <= -0.20: | |
| trend = "a **negative** association β larger bills tend to have **lower** tip percentages." | |
| elif r >= 0.20: | |
| trend = "a **positive** association β larger bills tend to have **higher** tip percentages." | |
| else: | |
| trend = "**little to no clear** linear association between bill size and tip percentage." | |
| return ( | |
| f"**Insight:** Based on the current selection (n = {n}), the correlation between " | |
| f"total bill and tip percentage is **{r:+.2f}**, suggesting {trend} " | |
| f"The average tip percentage in this selection is **{avg:.1f}%**." | |
| ) | |
| st.markdown( | |
| insight_text( | |
| len(df), | |
| 0.0 if len(df) == 0 else df["total_bill"].corr(df["tip_pct"]), | |
| 0.0 if len(df) == 0 else df["tip_pct"].mean(), | |
| ) | |
| ) | |
| # 8) Footnote & performance hint | |
| st.caption( | |
| "Notes: correlation is computed with Pearsonβs r. " | |
| "Extreme tip % values (top/bottom 1%) can be optionally removed for stability." | |
| ) |