Spaces:

huohuobeixiaosile
/

666

Sleeping

App Files Files Community

huohuobeixiaosile commited on Sep 13

Commit

e3823e4

verified ·

1 Parent(s): 84e588e

Upload 2 files

Browse files

Files changed (2) hide show

W2_assignment_streamlit.py +162 -0
requirements.txt +6 -0

W2_assignment_streamlit.py ADDED Viewed

	@@ -0,0 +1,162 @@

+# Question: Does a higher bill amount lead to a lower tip percentage?
+import numpy as np
+import seaborn as sns
+import streamlit as st
+import altair as alt
+st.set_page_config(page_title="Tips Explorer: Bill vs Tip %", page_icon="💸", layout="wide")
+# 1) Data loading
+@st.cache_data
+def load_data():
+    df = sns.load_dataset("tips").copy()
+    df["tip_pct"] = df["tip"] / df["total_bill"] * 100
+    keep = ["total_bill", "tip", "tip_pct", "sex", "smoker", "day", "time", "size"]
+    df = df[keep].dropna()
+    return df
+tips = load_data()
+# 2) Title & problem statement
+st.title("💸 Do Bigger Bills Mean Smaller Tip % ?")
+st.caption("Explore whether higher bills are associated with lower tipping percentages.")
+st.markdown(
+"> **User question:** Does a higher bill amount lead to a lower tip percentage?"
+)
+# 3) Sidebar controls (≥ 2)
+st.sidebar.header("Filters")
+# (a) bill
+bill_min = float(tips["total_bill"].min())
+bill_max = float(tips["total_bill"].max())
+bill_range = st.sidebar.slider(
+    "Total bill range ($)",
+    min_value=round(bill_min, 1),
+    max_value=round(bill_max, 1),
+    value=(round(bill_min, 1), round(bill_max, 1)),
+    step=0.5,
+)
+# (b) weekdays
+days = ["Thur", "Fri", "Sat", "Sun"]
+day_choice = st.sidebar.multiselect("Day(s) of week", days, default=days)
+# (c) mealtime
+time_choice = st.sidebar.radio("Meal", options=["All", "Lunch", "Dinner"], index=0)
+# (d) Outlier Removal
+clip_outliers = st.sidebar.checkbox("Remove extreme tip % (top/bottom 1%)", value=True)
+# 4) Apply filters
+df = tips[
+    (tips["total_bill"] >= bill_range[0]) &
+    (tips["total_bill"] <= bill_range[1]) &
+    (tips["day"].isin(day_choice))
+].copy()
+if time_choice != "All":
+    df = df[df["time"] == time_choice]
+# Outlier Removal (for More Stable KPIs and Visualizations)
+if clip_outliers and len(df) > 10:
+    low, high = np.percentile(df["tip_pct"], [1, 99])
+    df = df[(df["tip_pct"] >= low) & (df["tip_pct"] <= high)]
+# 5) KPIs (≥ 1)
+col1, col2, col3 = st.columns(3)
+if len(df) > 0:
+    avg_tip_pct = df["tip_pct"].mean()
+    med_tip_pct = df["tip_pct"].median()
+    corr = df["total_bill"].corr(df["tip_pct"])  # Pearson Correlation
+    col1.metric("Average Tip %", f"{avg_tip_pct:.1f}%")
+    col2.metric("Median Tip %", f"{med_tip_pct:.1f}%")
+    col3.metric("Corr( Bill , Tip % )", f"{corr:+.2f}")
+else:
+    col1.metric("Average Tip %", "–")
+    col2.metric("Median Tip %", "–")
+    col3.metric("Corr( Bill , Tip % )", "–")
+st.divider()
+# 6) Visualization (≥ 1)
+st.subheader("Tip Percentage vs. Bill Amount")
+if len(df) == 0:
+    st.info("No data under current filters. Try expanding the bill range or selecting more days.")
+else:
+    base = alt.Chart(df).properties(width=800, height=420)
+    scatter = (
+        base.mark_circle(size=70, opacity=0.65, color="#4C78A8")
+        .encode(
+            x=alt.X("total_bill:Q", title="Total Bill ($)"),
+            y=alt.Y("tip_pct:Q", title="Tip Percentage (%)"),
+            tooltip=[
+                alt.Tooltip("total_bill:Q", title="Bill ($)", format=".2f"),
+                alt.Tooltip("tip_pct:Q", title="Tip %", format=".1f"),
+                alt.Tooltip("day:N", title="Day"),
+                alt.Tooltip("time:N", title="Meal"),
+                alt.Tooltip("size:Q", title="Party Size"),
+            ],
+        )
+    )
+    # Used Altair's built-in regression function, which automatically plots the trend line
+    reg = (
+        base.transform_regression("total_bill", "tip_pct")
+        .mark_line(color="#E45756", size=3)
+        .encode(x="total_bill:Q", y="tip_pct:Q")
+    )
+    chart = (scatter + reg).resolve_scale(y="independent")
+    st.altair_chart(chart, use_container_width=True)
+# 7) Dynamic insight text
+def insight_text(n, r, avg):
+    if n == 0:
+        return "No data available under the current filters."
+  # Turn the correlation (r) into a plain-English explanation
+# For example:
+#    Large r → Bigger bills usually mean higher tip percentages
+#    Small r → Little to no relationship
+#    Negative r → Bigger bills usually mean lower tip percentages
+    if r <= -0.20:
+        trend = "a **negative** association — larger bills tend to have **lower** tip percentages."
+    elif r >= 0.20:
+        trend = "a **positive** association — larger bills tend to have **higher** tip percentages."
+    else:
+        trend = "**little to no clear** linear association between bill size and tip percentage."
+    return (
+        f"**Insight:** Based on the current selection (n = {n}), the correlation between "
+        f"total bill and tip percentage is **{r:+.2f}**, suggesting {trend} "
+        f"The average tip percentage in this selection is **{avg:.1f}%**."
+    )
+st.markdown(
+    insight_text(
+        len(df),
+        0.0 if len(df) == 0 else df["total_bill"].corr(df["tip_pct"]),
+        0.0 if len(df) == 0 else df["tip_pct"].mean(),
+    )
+)
+# 8) Footnote & performance hint
+st.caption(
+    "Notes: correlation is computed with Pearson’s r. "
+    "Extreme tip % values (top/bottom 1%) can be optionally removed for stability."
+)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit>=1.29
+pandas
+numpy
+seaborn>=0.13
+matplotlib
+altair