Would you Survive?

yn = input("Would you like to install package dependencies? (y/n)")

if yn == "y":
    print("Installing dependencies")
    !pip3 install pandas
    !pip3 install numpy
    !pip3 install scikit-learn
else:
    print("Moving on")

Installing dependencies
Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: pandas in /home/dudeamabobby/.local/lib/python3.10/site-packages (2.1.0)
Requirement already satisfied: numpy>=1.22.4 in /home/dudeamabobby/.local/lib/python3.10/site-packages (from pandas) (1.25.2)
Requirement already satisfied: python-dateutil>=2.8.2 in /home/dudeamabobby/.local/lib/python3.10/site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/lib/python3/dist-packages (from pandas) (2022.1)
Requirement already satisfied: tzdata>=2022.1 in /home/dudeamabobby/.local/lib/python3.10/site-packages (from pandas) (2023.3)
Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)
Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: numpy in /home/dudeamabobby/.local/lib/python3.10/site-packages (1.25.2)
Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: scikit-learn in /home/dudeamabobby/.local/lib/python3.10/site-packages (1.3.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /home/dudeamabobby/.local/lib/python3.10/site-packages (from scikit-learn) (3.2.0)
Requirement already satisfied: joblib>=1.1.1 in /home/dudeamabobby/.local/lib/python3.10/site-packages (from scikit-learn) (1.3.2)
Requirement already satisfied: scipy>=1.5.0 in /usr/lib/python3/dist-packages (from scikit-learn) (1.8.0)
Requirement already satisfied: numpy>=1.17.3 in /home/dudeamabobby/.local/lib/python3.10/site-packages (from scikit-learn) (1.25.2)

print("Importing packages")
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import pdb
import pickle  # Import pickle for model saving

Importing packages


/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.25.2
  warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"

print("Loading, pre-processing, and splitting data")

# Load the data
data_train = pd.read_csv('./trainStroke.csv')

Loading, pre-processing, and splitting data

data_train.head()

	id	gender	age	hypertension	heart_disease	ever_married	work_type	Residence_type	avg_glucose_level	bmi	smoking_status	stroke
0	9046	1.0	67.0	0	1	1	Private	Urban	228.69	36.600000	formerly smoked	1
1	51676	0.0	61.0	0	0	1	Self-employed	Rural	202.21	28.893237	never smoked	1
2	31112	1.0	80.0	0	1	1	Private	Rural	105.92	32.500000	never smoked	1
3	60182	0.0	49.0	0	0	1	Private	Urban	171.23	34.400000	smokes	1
4	1665	0.0	79.0	1	0	1	Self-employed	Rural	174.12	24.000000	never smoked	1

# Fill missing values for Age with the mean
data_train['age'].fillna(data_train['age'].mean(), inplace=True)
data_train['bmi'].fillna(data_train['bmi'].mean(), inplace=True)
print(data_train.isnull().sum())

# Encode 'Sex' column
data_train['gender'] = data_train['gender'].map({'Male': 1, 'Female': 0})

data_train['ever_married'] = data_train['ever_married'].map({'Yes': 1, 'No': 0})

# Select features and target variable
X = data_train[['gender', 'age', 'hypertension', 'heart_disease', 'ever_married', 'avg_glucose_level','bmi']]
y = data_train['stroke']


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

id                   0
gender               0
age                  0
hypertension         0
heart_disease        0
ever_married         0
work_type            0
Residence_type       0
avg_glucose_level    0
bmi                  0
smoking_status       0
stroke               0
dtype: int64

print("Training model")

# Initialize and train the logistic regression model
log_reg = LogisticRegression(solver='liblinear')
log_reg.fit(X_train, y_train)

pdb.set_trace()

print("Training completed")

save_path = "logistic_regression_model_stroke.pkl"
print("Saving model to: " + save_path)
# Save the model to a file using pickle
with open(save_path, 'wb') as model_file:
    pickle.dump(log_reg, model_file)

# Load the model back from the file
with open(save_path, 'rb') as model_file:
    loaded_model = pickle.load(model_file)

print("Finished saving trained model")

# pdb.set_trace()

# Make predictions on the test set using the loaded model

Training model
--Return--
None
> [0;32m/tmp/ipykernel_1603/4206476194.py[0m(7)[0;36m<module>[0;34m()[0m
[0;32m      5 [0;31m[0mlog_reg[0m[0;34m.[0m[0mfit[0m[0;34m([0m[0mX_train[0m[0;34m,[0m [0my_train[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      6 [0;31m[0;34m[0m[0m
[0m[0;32m----> 7 [0;31m[0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      8 [0;31m[0;34m[0m[0m
[0m[0;32m      9 [0;31m[0mprint[0m[0;34m([0m[0;34m"Training completed"[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


Training completed
Saving model to: logistic_regression_model_stroke.pkl
Finished saving trained model

X_test['gender'].fillna(1, inplace=True)

X_test.isna().sum()

y_pred = log_reg.predict(X_test)
score = accuracy_score(y_test, y_pred)
print(score)

0.9393346379647749