# Question: Does a higher bill amount lead to a lower tip percentage? import numpy as np import seaborn as sns import streamlit as st import altair as alt st.set_page_config(page_title="Tips Explorer: Bill vs Tip %", page_icon="πŸ’Έ", layout="wide") # 1) Data loading @st.cache_data def load_data(): df = sns.load_dataset("tips").copy() df["tip_pct"] = df["tip"] / df["total_bill"] * 100 keep = ["total_bill", "tip", "tip_pct", "sex", "smoker", "day", "time", "size"] df = df[keep].dropna() return df tips = load_data() # 2) Title & problem statement st.title("πŸ’Έ Do Bigger Bills Mean Smaller Tip % ?") st.caption("Explore whether higher bills are associated with lower tipping percentages.") st.markdown( "> **User question:** Does a higher bill amount lead to a lower tip percentage?" ) # 3) Sidebar controls (β‰₯ 2) st.sidebar.header("Filters") # (a) bill bill_min = float(tips["total_bill"].min()) bill_max = float(tips["total_bill"].max()) bill_range = st.sidebar.slider( "Total bill range ($)", min_value=round(bill_min, 1), max_value=round(bill_max, 1), value=(round(bill_min, 1), round(bill_max, 1)), step=0.5, ) # (b) weekdays days = ["Thur", "Fri", "Sat", "Sun"] day_choice = st.sidebar.multiselect("Day(s) of week", days, default=days) # (c) mealtime time_choice = st.sidebar.radio("Meal", options=["All", "Lunch", "Dinner"], index=0) # (d) Outlier Removal clip_outliers = st.sidebar.checkbox("Remove extreme tip % (top/bottom 1%)", value=True) # 4) Apply filters df = tips[ (tips["total_bill"] >= bill_range[0]) & (tips["total_bill"] <= bill_range[1]) & (tips["day"].isin(day_choice)) ].copy() if time_choice != "All": df = df[df["time"] == time_choice] # Outlier Removal (for More Stable KPIs and Visualizations) if clip_outliers and len(df) > 10: low, high = np.percentile(df["tip_pct"], [1, 99]) df = df[(df["tip_pct"] >= low) & (df["tip_pct"] <= high)] # 5) KPIs (β‰₯ 1) col1, col2, col3 = st.columns(3) if len(df) > 0: avg_tip_pct = df["tip_pct"].mean() med_tip_pct = df["tip_pct"].median() corr = df["total_bill"].corr(df["tip_pct"]) # Pearson Correlation col1.metric("Average Tip %", f"{avg_tip_pct:.1f}%") col2.metric("Median Tip %", f"{med_tip_pct:.1f}%") col3.metric("Corr( Bill , Tip % )", f"{corr:+.2f}") else: col1.metric("Average Tip %", "–") col2.metric("Median Tip %", "–") col3.metric("Corr( Bill , Tip % )", "–") st.divider() # 6) Visualization (β‰₯ 1) st.subheader("Tip Percentage vs. Bill Amount") if len(df) == 0: st.info("No data under current filters. Try expanding the bill range or selecting more days.") else: base = alt.Chart(df).properties(width=800, height=420) scatter = ( base.mark_circle(size=70, opacity=0.65, color="#4C78A8") .encode( x=alt.X("total_bill:Q", title="Total Bill ($)"), y=alt.Y("tip_pct:Q", title="Tip Percentage (%)"), tooltip=[ alt.Tooltip("total_bill:Q", title="Bill ($)", format=".2f"), alt.Tooltip("tip_pct:Q", title="Tip %", format=".1f"), alt.Tooltip("day:N", title="Day"), alt.Tooltip("time:N", title="Meal"), alt.Tooltip("size:Q", title="Party Size"), ], ) ) # Used Altair's built-in regression function, which automatically plots the trend line reg = ( base.transform_regression("total_bill", "tip_pct") .mark_line(color="#E45756", size=3) .encode(x="total_bill:Q", y="tip_pct:Q") ) chart = (scatter + reg).resolve_scale(y="independent") st.altair_chart(chart, use_container_width=True) # 7) Dynamic insight text def insight_text(n, r, avg): if n == 0: return "No data available under the current filters." # Turn the correlation (r) into a plain-English explanation # For example: # Large r β†’ Bigger bills usually mean higher tip percentages # Small r β†’ Little to no relationship # Negative r β†’ Bigger bills usually mean lower tip percentages if r <= -0.20: trend = "a **negative** association β€” larger bills tend to have **lower** tip percentages." elif r >= 0.20: trend = "a **positive** association β€” larger bills tend to have **higher** tip percentages." else: trend = "**little to no clear** linear association between bill size and tip percentage." return ( f"**Insight:** Based on the current selection (n = {n}), the correlation between " f"total bill and tip percentage is **{r:+.2f}**, suggesting {trend} " f"The average tip percentage in this selection is **{avg:.1f}%**." ) st.markdown( insight_text( len(df), 0.0 if len(df) == 0 else df["total_bill"].corr(df["tip_pct"]), 0.0 if len(df) == 0 else df["tip_pct"].mean(), ) ) # 8) Footnote & performance hint st.caption( "Notes: correlation is computed with Pearson’s r. " "Extreme tip % values (top/bottom 1%) can be optionally removed for stability." )