huohuobeixiaosile commited on
Commit
e3823e4
·
verified ·
1 Parent(s): 84e588e

Upload 2 files

Browse files
Files changed (2) hide show
  1. W2_assignment_streamlit.py +162 -0
  2. requirements.txt +6 -0
W2_assignment_streamlit.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Question: Does a higher bill amount lead to a lower tip percentage?
3
+
4
+ import numpy as np
5
+ import seaborn as sns
6
+ import streamlit as st
7
+ import altair as alt
8
+
9
+ st.set_page_config(page_title="Tips Explorer: Bill vs Tip %", page_icon="💸", layout="wide")
10
+
11
+ # 1) Data loading
12
+ @st.cache_data
13
+ def load_data():
14
+ df = sns.load_dataset("tips").copy()
15
+ df["tip_pct"] = df["tip"] / df["total_bill"] * 100
16
+ keep = ["total_bill", "tip", "tip_pct", "sex", "smoker", "day", "time", "size"]
17
+ df = df[keep].dropna()
18
+ return df
19
+
20
+ tips = load_data()
21
+
22
+
23
+ # 2) Title & problem statement
24
+ st.title("💸 Do Bigger Bills Mean Smaller Tip % ?")
25
+ st.caption("Explore whether higher bills are associated with lower tipping percentages.")
26
+
27
+ st.markdown(
28
+ "> **User question:** Does a higher bill amount lead to a lower tip percentage?"
29
+ )
30
+
31
+
32
+ # 3) Sidebar controls (≥ 2)
33
+ st.sidebar.header("Filters")
34
+
35
+ # (a) bill
36
+ bill_min = float(tips["total_bill"].min())
37
+ bill_max = float(tips["total_bill"].max())
38
+ bill_range = st.sidebar.slider(
39
+ "Total bill range ($)",
40
+ min_value=round(bill_min, 1),
41
+ max_value=round(bill_max, 1),
42
+ value=(round(bill_min, 1), round(bill_max, 1)),
43
+ step=0.5,
44
+ )
45
+
46
+ # (b) weekdays
47
+ days = ["Thur", "Fri", "Sat", "Sun"]
48
+ day_choice = st.sidebar.multiselect("Day(s) of week", days, default=days)
49
+
50
+ # (c) mealtime
51
+ time_choice = st.sidebar.radio("Meal", options=["All", "Lunch", "Dinner"], index=0)
52
+
53
+ # (d) Outlier Removal
54
+ clip_outliers = st.sidebar.checkbox("Remove extreme tip % (top/bottom 1%)", value=True)
55
+
56
+
57
+ # 4) Apply filters
58
+ df = tips[
59
+ (tips["total_bill"] >= bill_range[0]) &
60
+ (tips["total_bill"] <= bill_range[1]) &
61
+ (tips["day"].isin(day_choice))
62
+ ].copy()
63
+
64
+ if time_choice != "All":
65
+ df = df[df["time"] == time_choice]
66
+
67
+ # Outlier Removal (for More Stable KPIs and Visualizations)
68
+ if clip_outliers and len(df) > 10:
69
+ low, high = np.percentile(df["tip_pct"], [1, 99])
70
+ df = df[(df["tip_pct"] >= low) & (df["tip_pct"] <= high)]
71
+
72
+
73
+ # 5) KPIs (≥ 1)
74
+ col1, col2, col3 = st.columns(3)
75
+ if len(df) > 0:
76
+ avg_tip_pct = df["tip_pct"].mean()
77
+ med_tip_pct = df["tip_pct"].median()
78
+ corr = df["total_bill"].corr(df["tip_pct"]) # Pearson Correlation
79
+
80
+ col1.metric("Average Tip %", f"{avg_tip_pct:.1f}%")
81
+ col2.metric("Median Tip %", f"{med_tip_pct:.1f}%")
82
+ col3.metric("Corr( Bill , Tip % )", f"{corr:+.2f}")
83
+ else:
84
+ col1.metric("Average Tip %", "–")
85
+ col2.metric("Median Tip %", "–")
86
+ col3.metric("Corr( Bill , Tip % )", "–")
87
+
88
+ st.divider()
89
+
90
+
91
+ # 6) Visualization (≥ 1)
92
+ st.subheader("Tip Percentage vs. Bill Amount")
93
+
94
+ if len(df) == 0:
95
+ st.info("No data under current filters. Try expanding the bill range or selecting more days.")
96
+ else:
97
+ base = alt.Chart(df).properties(width=800, height=420)
98
+
99
+ scatter = (
100
+ base.mark_circle(size=70, opacity=0.65, color="#4C78A8")
101
+ .encode(
102
+ x=alt.X("total_bill:Q", title="Total Bill ($)"),
103
+ y=alt.Y("tip_pct:Q", title="Tip Percentage (%)"),
104
+ tooltip=[
105
+ alt.Tooltip("total_bill:Q", title="Bill ($)", format=".2f"),
106
+ alt.Tooltip("tip_pct:Q", title="Tip %", format=".1f"),
107
+ alt.Tooltip("day:N", title="Day"),
108
+ alt.Tooltip("time:N", title="Meal"),
109
+ alt.Tooltip("size:Q", title="Party Size"),
110
+ ],
111
+ )
112
+ )
113
+
114
+ # Used Altair's built-in regression function, which automatically plots the trend line
115
+ reg = (
116
+ base.transform_regression("total_bill", "tip_pct")
117
+ .mark_line(color="#E45756", size=3)
118
+ .encode(x="total_bill:Q", y="tip_pct:Q")
119
+ )
120
+
121
+ chart = (scatter + reg).resolve_scale(y="independent")
122
+
123
+ st.altair_chart(chart, use_container_width=True)
124
+
125
+
126
+ # 7) Dynamic insight text
127
+ def insight_text(n, r, avg):
128
+ if n == 0:
129
+ return "No data available under the current filters."
130
+
131
+ # Turn the correlation (r) into a plain-English explanation
132
+ # For example:
133
+ # Large r → Bigger bills usually mean higher tip percentages
134
+ # Small r → Little to no relationship
135
+ # Negative r → Bigger bills usually mean lower tip percentages
136
+ if r <= -0.20:
137
+ trend = "a **negative** association — larger bills tend to have **lower** tip percentages."
138
+ elif r >= 0.20:
139
+ trend = "a **positive** association — larger bills tend to have **higher** tip percentages."
140
+ else:
141
+ trend = "**little to no clear** linear association between bill size and tip percentage."
142
+
143
+ return (
144
+ f"**Insight:** Based on the current selection (n = {n}), the correlation between "
145
+ f"total bill and tip percentage is **{r:+.2f}**, suggesting {trend} "
146
+ f"The average tip percentage in this selection is **{avg:.1f}%**."
147
+ )
148
+
149
+ st.markdown(
150
+ insight_text(
151
+ len(df),
152
+ 0.0 if len(df) == 0 else df["total_bill"].corr(df["tip_pct"]),
153
+ 0.0 if len(df) == 0 else df["tip_pct"].mean(),
154
+ )
155
+ )
156
+
157
+
158
+ # 8) Footnote & performance hint
159
+ st.caption(
160
+ "Notes: correlation is computed with Pearson’s r. "
161
+ "Extreme tip % values (top/bottom 1%) can be optionally removed for stability."
162
+ )
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit>=1.29
2
+ pandas
3
+ numpy
4
+ seaborn>=0.13
5
+ matplotlib
6
+ altair