yn = input("Would you like to install package dependencies? (y/n)")

if yn == "y":
    print("Installing dependencies")
    !pip3 install pandas
    !pip3 install numpy
    !pip3 install scikit-learn
else:
    print("Moving on")
Moving on
print("Importing packages")
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import pdb
import pickle  # Import pickle for model saving
Importing packages


/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.25.2
  warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
print("Loading, pre-processing, and splitting data")

# Load the data
data_train = pd.read_csv('./trainLoan.csv')
Loading, pre-processing, and splitting data
data_train.head()
data_train[' education'].values

array([' Graduate', ' Not Graduate', ' Graduate', ..., ' Not Graduate',
       ' Not Graduate', ' Graduate'], dtype=object)
# Fill missing values for Age with the mean
print(data_train.isnull().sum())

# Encode 'Sex' column
data_train[' education'] = data_train[' education'].map({' Graduate': 1, ' Not Graduate': 0})

data_train[' self_employed'] = data_train[' self_employed'].map({' Yes': 1, ' No': 0})

data_train[" loan_status"]= data_train[" loan_status"].map({" Approved": 1, " Rejected": 0})

# Select features and target variable
X = data_train[[' no_of_dependents', ' education', ' self_employed', ' income_annum', ' loan_amount', ' loan_term',' cibil_score',' residential_assets_value',' commercial_assets_value',' luxury_assets_value',' bank_asset_value']]
y = data_train[' loan_status']


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
loan_id                      0
 no_of_dependents            0
 education                   0
 self_employed               0
 income_annum                0
 loan_amount                 0
 loan_term                   0
 cibil_score                 0
 residential_assets_value    0
 commercial_assets_value     0
 luxury_assets_value         0
 bank_asset_value            0
 loan_status                 0
dtype: int64
print("Training model")

# Initialize and train the logistic regression model
log_reg = LogisticRegression(solver='liblinear')
log_reg.fit(X_train, y_train)

pdb.set_trace()

print("Training completed")

save_path = "logistic_regression_model_loan.pkl"
print("Saving model to: " + save_path)
# Save the model to a file using pickle
with open(save_path, 'wb') as model_file:
    pickle.dump(log_reg, model_file)

# Load the model back from the file
with open(save_path, 'rb') as model_file:
    loaded_model = pickle.load(model_file)

print("Finished saving trained model")

# pdb.set_trace()

# Make predictions on the test set using the loaded model


Training model
--Return--
None
> /tmp/ipykernel_4065/3141272518.py(7)<module>()
      5 log_reg.fit(X_train, y_train)
      6 
----> 7 pdb.set_trace()
      8 
      9 print("Training completed")



Training completed
Saving model to: logistic_regression_model_loan.pkl
Finished saving trained model
X_test.isna().sum()

y_pred = log_reg.predict(X_test)
score = accuracy_score(y_test, y_pred)
print(score)
---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)

~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py in get_loc(self, key)
   3789         try:
-> 3790             return self._engine.get_loc(casted_key)
   3791         except KeyError as err:


index.pyx in pandas._libs.index.IndexEngine.get_loc()


index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


KeyError: 'gender'


The above exception was the direct cause of the following exception:


KeyError                                  Traceback (most recent call last)

/tmp/ipykernel_4065/582419892.py in <module>
----> 1 X_test['gender'].fillna(1, inplace=True)
      2 
      3 X_test.isna().sum()
      4 
      5 y_pred = log_reg.predict(X_test)


~/.local/lib/python3.10/site-packages/pandas/core/frame.py in __getitem__(self, key)
   3894             if self.columns.nlevels > 1:
   3895                 return self._getitem_multilevel(key)
-> 3896             indexer = self.columns.get_loc(key)
   3897             if is_integer(indexer):
   3898                 indexer = [indexer]


~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py in get_loc(self, key)
   3795             ):
   3796                 raise InvalidIndexError(key)
-> 3797             raise KeyError(key) from err
   3798         except TypeError:
   3799             # If we have a listlike key, _check_indexing_error will raise


KeyError: 'gender'