Spaces:

huohuobeixiaosile
/

666

Sleeping

App Files Files Community

666 / W2_assignment_streamlit.py

huohuobeixiaosile

Upload 2 files

e3823e4 verified 3 months ago

raw

history blame contribute delete

5.09 kB


	# Question: Does a higher bill amount lead to a lower tip percentage?

	import numpy as np
	import seaborn as sns
	import streamlit as st
	import altair as alt

	st.set_page_config(page_title="Tips Explorer: Bill vs Tip %", page_icon="💸", layout="wide")

	# 1) Data loading
	@st.cache_data
	def load_data():
	df = sns.load_dataset("tips").copy()
	df["tip_pct"] = df["tip"] / df["total_bill"] * 100
	keep = ["total_bill", "tip", "tip_pct", "sex", "smoker", "day", "time", "size"]
	df = df[keep].dropna()
	return df

	tips = load_data()


	# 2) Title & problem statement
	st.title("💸 Do Bigger Bills Mean Smaller Tip % ?")
	st.caption("Explore whether higher bills are associated with lower tipping percentages.")

	st.markdown(
	"> User question: Does a higher bill amount lead to a lower tip percentage?"
	)


	# 3) Sidebar controls (≥ 2)
	st.sidebar.header("Filters")

	# (a) bill
	bill_min = float(tips["total_bill"].min())
	bill_max = float(tips["total_bill"].max())
	bill_range = st.sidebar.slider(
	"Total bill range ($)",
	min_value=round(bill_min, 1),
	max_value=round(bill_max, 1),
	value=(round(bill_min, 1), round(bill_max, 1)),
	step=0.5,
	)

	# (b) weekdays
	days = ["Thur", "Fri", "Sat", "Sun"]
	day_choice = st.sidebar.multiselect("Day(s) of week", days, default=days)

	# (c) mealtime
	time_choice = st.sidebar.radio("Meal", options=["All", "Lunch", "Dinner"], index=0)

	# (d) Outlier Removal
	clip_outliers = st.sidebar.checkbox("Remove extreme tip % (top/bottom 1%)", value=True)


	# 4) Apply filters
	df = tips[
	(tips["total_bill"] >= bill_range[0]) &
	(tips["total_bill"] <= bill_range[1]) &
	(tips["day"].isin(day_choice))
	].copy()

	if time_choice != "All":
	df = df[df["time"] == time_choice]

	# Outlier Removal (for More Stable KPIs and Visualizations)
	if clip_outliers and len(df) > 10:
	low, high = np.percentile(df["tip_pct"], [1, 99])
	df = df[(df["tip_pct"] >= low) & (df["tip_pct"] <= high)]


	# 5) KPIs (≥ 1)
	col1, col2, col3 = st.columns(3)
	if len(df) > 0:
	avg_tip_pct = df["tip_pct"].mean()
	med_tip_pct = df["tip_pct"].median()
	corr = df["total_bill"].corr(df["tip_pct"]) # Pearson Correlation

	col1.metric("Average Tip %", f"{avg_tip_pct:.1f}%")
	col2.metric("Median Tip %", f"{med_tip_pct:.1f}%")
	col3.metric("Corr( Bill , Tip % )", f"{corr:+.2f}")
	else:
	col1.metric("Average Tip %", "–")
	col2.metric("Median Tip %", "–")
	col3.metric("Corr( Bill , Tip % )", "–")

	st.divider()


	# 6) Visualization (≥ 1)
	st.subheader("Tip Percentage vs. Bill Amount")

	if len(df) == 0:
	st.info("No data under current filters. Try expanding the bill range or selecting more days.")
	else:
	base = alt.Chart(df).properties(width=800, height=420)

	scatter = (
	base.mark_circle(size=70, opacity=0.65, color="#4C78A8")
	.encode(
	x=alt.X("total_bill:Q", title="Total Bill ($)"),
	y=alt.Y("tip_pct:Q", title="Tip Percentage (%)"),
	tooltip=[
	alt.Tooltip("total_bill:Q", title="Bill ($)", format=".2f"),
	alt.Tooltip("tip_pct:Q", title="Tip %", format=".1f"),
	alt.Tooltip("day:N", title="Day"),
	alt.Tooltip("time:N", title="Meal"),
	alt.Tooltip("size:Q", title="Party Size"),
	],
	)
	)

	# Used Altair's built-in regression function, which automatically plots the trend line
	reg = (
	base.transform_regression("total_bill", "tip_pct")
	.mark_line(color="#E45756", size=3)
	.encode(x="total_bill:Q", y="tip_pct:Q")
	)

	chart = (scatter + reg).resolve_scale(y="independent")

	st.altair_chart(chart, use_container_width=True)


	# 7) Dynamic insight text
	def insight_text(n, r, avg):
	if n == 0:
	return "No data available under the current filters."

	# Turn the correlation (r) into a plain-English explanation
	# For example:
	# Large r → Bigger bills usually mean higher tip percentages
	# Small r → Little to no relationship
	# Negative r → Bigger bills usually mean lower tip percentages
	if r <= -0.20:
	trend = "a negative association — larger bills tend to have lower tip percentages."
	elif r >= 0.20:
	trend = "a positive association — larger bills tend to have higher tip percentages."
	else:
	trend = "little to no clear linear association between bill size and tip percentage."

	return (
	f"Insight: Based on the current selection (n = {n}), the correlation between "
	f"total bill and tip percentage is {r:+.2f}, suggesting {trend} "
	f"The average tip percentage in this selection is {avg:.1f}%."
	)

	st.markdown(
	insight_text(
	len(df),
	0.0 if len(df) == 0 else df["total_bill"].corr(df["tip_pct"]),
	0.0 if len(df) == 0 else df["tip_pct"].mean(),
	)
	)


	# 8) Footnote & performance hint
	st.caption(
	"Notes: correlation is computed with Pearson’s r. "
	"Extreme tip % values (top/bottom 1%) can be optionally removed for stability."
	)