WissMah commited on
Commit
fe34a8b
Β·
verified Β·
1 Parent(s): 8b3f65a

Upload 5 files

Browse files
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify
2
+ import joblib
3
+ import numpy as np
4
+ import os
5
+
6
+ APP_PORT = int(os.getenv("PORT", "8080"))
7
+
8
+ app = Flask(__name__)
9
+
10
+ MODEL_PATH = os.getenv("MODEL_PATH", "model/stroke_pipeline.joblib")
11
+
12
+ # Load model pipeline at startup
13
+ try:
14
+ pipeline = joblib.load(MODEL_PATH)
15
+ except Exception as e:
16
+ raise RuntimeError(f"Failed to load model at {MODEL_PATH}: {e}")
17
+
18
+ FEATURE_ORDER = [
19
+ "gender",
20
+ "age",
21
+ "hypertension",
22
+ "heart_disease",
23
+ "ever_married",
24
+ "work_type",
25
+ "Residence_type",
26
+ "avg_glucose_level",
27
+ "bmi",
28
+ "smoking_status",
29
+ ]
30
+
31
+ # Simple healthcheck
32
+ @app.route("/health", methods=["GET"])
33
+ def health():
34
+ return jsonify({"status": "ok"}), 200
35
+
36
+ @app.route("/", methods=["GET"])
37
+ def index():
38
+ # Provide default values to make testing easy
39
+ defaults = {
40
+ "gender": "Female",
41
+ "age": 45,
42
+ "hypertension": 0,
43
+ "heart_disease": 0,
44
+ "ever_married": "Yes",
45
+ "work_type": "Private",
46
+ "Residence_type": "Urban",
47
+ "avg_glucose_level": 95.0,
48
+ "bmi": 28.0,
49
+ "smoking_status": "never smoked",
50
+ }
51
+ return render_template("index.html", defaults=defaults)
52
+
53
+ @app.route("/predict", methods=["POST"])
54
+ def predict():
55
+ try:
56
+ # Read input either from JSON (API) or form (UI)
57
+ if request.is_json:
58
+ payload = request.get_json()
59
+ else:
60
+ payload = request.form.to_dict()
61
+
62
+ # Ensure types
63
+ # Map numeric fields
64
+ numeric_fields = ["age", "avg_glucose_level", "bmi"]
65
+ int_fields = ["hypertension", "heart_disease"]
66
+
67
+ for k in numeric_fields:
68
+ if k in payload:
69
+ payload[k] = float(payload[k])
70
+ for k in int_fields:
71
+ if k in payload:
72
+ payload[k] = int(payload[k])
73
+
74
+ # Build row in fixed feature order
75
+ row = [[payload.get(f, None) for f in FEATURE_ORDER]]
76
+
77
+ # Predict proba (stroke = 1)
78
+ prob = float(pipeline.predict_proba(row)[0][1])
79
+ pred = int(prob >= 0.5)
80
+
81
+ result = {"stroke_probability": prob, "predicted_label": pred}
82
+ if request.is_json:
83
+ return jsonify(result)
84
+ else:
85
+ return render_template("index.html", result=result, defaults=payload)
86
+ except Exception as e:
87
+ msg = {"error": str(e)}
88
+ if request.is_json:
89
+ return jsonify(msg), 400
90
+ else:
91
+ return render_template("index.html", error=str(e), defaults=request.form), 400
92
+
93
+ if __name__ == "__main__":
94
+ app.run(host="0.0.0.0", port=APP_PORT, debug=False)
model/stroke_pipeline.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:187a196587db135daceeb725e5ac58d9cc64403e6a88627a19dda8d1b998b857
3
+ size 6903
model/train_and_save.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Train & save a full sklearn Pipeline for stroke prediction.
3
+
4
+ - If ./data/healthcare-dataset-stroke-data.csv exists, trains on it (matching the notebook structure).
5
+ - Otherwise, trains on a synthetic dataset with the same schema.
6
+ Saves: model/stroke_pipeline.joblib
7
+ """
8
+ from pathlib import Path
9
+ import pandas as pd
10
+ import numpy as np
11
+ import joblib
12
+
13
+ from sklearn.compose import ColumnTransformer
14
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
15
+ from sklearn.impute import SimpleImputer
16
+ from sklearn.linear_model import LogisticRegression
17
+ from sklearn.pipeline import Pipeline
18
+ from sklearn.model_selection import train_test_split
19
+ from sklearn.metrics import classification_report, roc_auc_score
20
+
21
+ DATA_PATH = Path("C:\Users\wissa\Downloads\data\stroke-flask-docker\data\healthcare-dataset-stroke-data.csv")
22
+ OUT_PATH = Path("C:\Users\wissa\Downloads\data\stroke-flask-docker\model/stroke_pipeline.joblib")
23
+ OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
24
+
25
+ CATEGORICAL = ["gender","ever_married","work_type","Residence_type","smoking_status"]
26
+ NUMERIC = ["age","avg_glucose_level","bmi"]
27
+ BINARY_INT = ["hypertension","heart_disease"] # keep as numeric ints
28
+
29
+ def load_real_or_synthetic():
30
+ if DATA_PATH.exists():
31
+ df = pd.read_csv(DATA_PATH)
32
+ # expected columns from the Kaggle stroke dataset
33
+ must_have = ["gender","age","hypertension","heart_disease","ever_married",
34
+ "work_type","Residence_type","avg_glucose_level","bmi",
35
+ "smoking_status","stroke"]
36
+ missing = set(must_have) - set(df.columns)
37
+ if missing:
38
+ raise ValueError(f"Dataset is missing columns: {missing}")
39
+ # drop id if present
40
+ df = df[[c for c in df.columns if c in must_have]]
41
+ return df
42
+ else:
43
+ # Synthetic data with the right columns
44
+ rng = np.random.RandomState(42)
45
+ N = 2000
46
+ df = pd.DataFrame({
47
+ "gender": rng.choice(["Male","Female","Other"], size=N, p=[0.49,0.50,0.01]),
48
+ "age": rng.randint(1, 90, size=N),
49
+ "hypertension": rng.binomial(1, 0.15, size=N),
50
+ "heart_disease": rng.binomial(1, 0.08, size=N),
51
+ "ever_married": rng.choice(["Yes","No"], size=N, p=[0.7,0.3]),
52
+ "work_type": rng.choice(["Private","Self-employed","Govt_job","children","Never_worked"], size=N, p=[0.6,0.2,0.18,0.01,0.01]),
53
+ "Residence_type": rng.choice(["Urban","Rural"], size=N, p=[0.55,0.45]),
54
+ "avg_glucose_level": rng.normal(100, 30, size=N).clip(50, 300),
55
+ "bmi": rng.normal(28, 6, size=N).clip(10, 60),
56
+ "smoking_status": rng.choice(["formerly smoked","never smoked","smokes","Unknown"], size=N, p=[0.2,0.6,0.15,0.05]),
57
+ })
58
+ # Fabricate a signal for stroke
59
+ logit = (
60
+ 0.03*df["age"] +
61
+ 0.02*(df["avg_glucose_level"]-100) +
62
+ 0.05*(df["bmi"]-28) +
63
+ 0.8*df["hypertension"] +
64
+ 0.9*df["heart_disease"] +
65
+ 0.3*(df["ever_married"]=="Yes").astype(int)
66
+ )
67
+ prob = 1/(1+np.exp(- (logit-4.0))) # bias to keep prevalence low
68
+ df["stroke"] = (rng.rand(len(df)) < prob).astype(int)
69
+ return df
70
+
71
+ def build_pipeline():
72
+ cat_proc = Pipeline(steps=[
73
+ ("impute", SimpleImputer(strategy="most_frequent")),
74
+ ("ohe", OneHotEncoder(handle_unknown="ignore"))
75
+ ])
76
+ num_proc = Pipeline(steps=[
77
+ ("impute", SimpleImputer(strategy="median")),
78
+ ("scale", StandardScaler())
79
+ ])
80
+ # Binary int -> treat as numeric (no scaling needed, but fine to scale)
81
+ bin_proc = Pipeline(steps=[
82
+ ("impute", SimpleImputer(strategy="most_frequent")),
83
+ ("scale", StandardScaler(with_mean=False)) # keep sparse-friendly path
84
+ ])
85
+
86
+ pre = ColumnTransformer(transformers=[
87
+ ("cat", cat_proc, CATEGORICAL),
88
+ ("num", num_proc, NUMERIC),
89
+ ("bin", bin_proc, BINARY_INT),
90
+ ])
91
+
92
+ clf = LogisticRegression(max_iter=1000, n_jobs=None)
93
+ pipeline = Pipeline([("pre", pre), ("clf", clf)])
94
+ return pipeline
95
+
96
+ def main():
97
+ df = load_real_or_synthetic()
98
+
99
+ X = df.drop(columns=["stroke"])
100
+ y = df["stroke"].astype(int)
101
+
102
+ X_train, X_test, y_train, y_test = train_test_split(
103
+ X, y, test_size=0.2, random_state=42, stratify=y
104
+ )
105
+
106
+ pipeline = build_pipeline()
107
+ pipeline.fit(X_train, y_train)
108
+
109
+ y_prob = pipeline.predict_proba(X_test)[:,1]
110
+ y_pred = (y_prob >= 0.5).astype(int)
111
+
112
+ print("AUC:", roc_auc_score(y_test, y_prob))
113
+ print("Report:\n", classification_report(y_test, y_pred))
114
+
115
+ joblib.dump(pipeline, OUT_PATH)
116
+ print(f"Saved pipeline to {OUT_PATH.resolve()}")
117
+
118
+ if __name__ == "__main__":
119
+ main()
static/style.css ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *{box-sizing:border-box}body{font-family:system-ui,-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif;background:#0b1220;color:#e8eef9;margin:0;padding:2rem}
2
+ .container{max-width:760px;margin:0 auto}
3
+ h1{margin-top:0}
4
+ .card{background:#111a2b;border:1px solid #1e2a44;border-radius:14px;padding:1rem;margin:1rem 0}
5
+ .row{display:flex;gap:1rem;margin:.6rem 0;align-items:center}
6
+ .row label{width:200px}
7
+ input,select,button{padding:.5rem;border-radius:8px;border:1px solid #2a3a5e;background:#0e1626;color:#e8eef9}
8
+ button{cursor:pointer}
9
+ .error{background:#3b0d0d;border:1px solid #7c1919;color:#ffd6d6;border-radius:10px;padding:.75rem;margin-bottom:1rem}
10
+ .result p{margin:.3rem 0}
11
+ .api code, .api pre{display:block;background:#0e1626;border:1px solid #2a3a5e;padding:8px;border-radius:10px;overflow-x:auto}
templates/index.html ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8"/>
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
6
+ <title>Stroke Risk Predictor</title>
7
+ <link rel="stylesheet" href="/static/style.css"/>
8
+ </head>
9
+ <body>
10
+ <div class="container">
11
+ <h1>πŸ’“ Stroke Risk Predictor</h1>
12
+ <p>Enter patient details and get a predicted stroke probability.</p>
13
+
14
+ {% if error %}
15
+ <div class="error">{{ error }}</div>
16
+ {% endif %}
17
+
18
+ <form method="POST" action="/predict" class="card">
19
+ <div class="row">
20
+ <label>Gender</label>
21
+ <select name="gender">
22
+ {% for g in ["Male","Female","Other"] %}
23
+ <option value="{{g}}" {% if defaults.gender==g %}selected{% endif %}>{{g}}</option>
24
+ {% endfor %}
25
+ </select>
26
+ </div>
27
+
28
+ <div class="row">
29
+ <label>Age</label>
30
+ <input type="number" name="age" step="1" min="0" max="120" value="{{defaults.age}}"/>
31
+ </div>
32
+
33
+ <div class="row">
34
+ <label>Hypertension</label>
35
+ <select name="hypertension">
36
+ {% for v in [0,1] %}
37
+ <option value="{{v}}" {% if defaults.hypertension==v %}selected{% endif %}>{{v}}</option>
38
+ {% endfor %}
39
+ </select>
40
+ </div>
41
+
42
+ <div class="row">
43
+ <label>Heart Disease</label>
44
+ <select name="heart_disease">
45
+ {% for v in [0,1] %}
46
+ <option value="{{v}}" {% if defaults.heart_disease==v %}selected{% endif %}>{{v}}</option>
47
+ {% endfor %}
48
+ </select>
49
+ </div>
50
+
51
+ <div class="row">
52
+ <label>Ever Married</label>
53
+ <select name="ever_married">
54
+ {% for v in ["Yes","No"] %}
55
+ <option value="{{v}}" {% if defaults.ever_married==v %}selected{% endif %}>{{v}}</option>
56
+ {% endfor %}
57
+ </select>
58
+ </div>
59
+
60
+ <div class="row">
61
+ <label>Work Type</label>
62
+ <select name="work_type">
63
+ {% for v in ["Private","Self-employed","Govt_job","children","Never_worked"] %}
64
+ <option value="{{v}}" {% if defaults.work_type==v %}selected{% endif %}>{{v}}</option>
65
+ {% endfor %}
66
+ </select>
67
+ </div>
68
+
69
+ <div class="row">
70
+ <label>Residence Type</label>
71
+ <select name="Residence_type">
72
+ {% for v in ["Urban","Rural"] %}
73
+ <option value="{{v}}" {% if defaults.Residence_type==v %}selected{% endif %}>{{v}}</option>
74
+ {% endfor %}
75
+ </select>
76
+ </div>
77
+
78
+ <div class="row">
79
+ <label>Avg. Glucose Level</label>
80
+ <input type="number" name="avg_glucose_level" step="0.01" value="{{defaults.avg_glucose_level}}"/>
81
+ </div>
82
+
83
+ <div class="row">
84
+ <label>BMI</label>
85
+ <input type="number" name="bmi" step="0.1" value="{{defaults.bmi}}"/>
86
+ </div>
87
+
88
+ <div class="row">
89
+ <label>Smoking Status</label>
90
+ <select name="smoking_status">
91
+ {% for v in ["formerly smoked","never smoked","smokes","Unknown"] %}
92
+ <option value="{{v}}" {% if defaults.smoking_status==v %}selected{% endif %}>{{v}}</option>
93
+ {% endfor %}
94
+ </select>
95
+ </div>
96
+
97
+ <button type="submit">Predict</button>
98
+ </form>
99
+
100
+ {% if result %}
101
+ <div class="result card">
102
+ <h2>Result</h2>
103
+ <p><strong>Predicted Stroke Probability:</strong> {{ '%.3f'|format(result.stroke_probability) }}</p>
104
+ <p><strong>Predicted Label (1 = Stroke):</strong> {{ result.predicted_label }}</p>
105
+ </div>
106
+ {% endif %}
107
+
108
+ <div class="api card">
109
+ <h3>API</h3>
110
+ <code>POST /predict</code> with JSON:
111
+ <pre>
112
+ {
113
+ "gender":"Female",
114
+ "age":45,
115
+ "hypertension":0,
116
+ "heart_disease":0,
117
+ "ever_married":"Yes",
118
+ "work_type":"Private",
119
+ "Residence_type":"Urban",
120
+ "avg_glucose_level":95.0,
121
+ "bmi":28.0,
122
+ "smoking_status":"never smoked"
123
+ }
124
+ </pre>
125
+ </div>
126
+ </div>
127
+ </body>
128
+ </html>