Importing packages
/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.25.2
warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
Loading, pre-processing, and splitting data
array([' Graduate', ' Not Graduate', ' Graduate', ..., ' Not Graduate',
' Not Graduate', ' Graduate'], dtype=object)
# Fill missing values for Age with the mean
print(data_train.isnull().sum())
# Encode 'Sex' column
data_train[' education'] = data_train[' education'].map({' Graduate': 1, ' Not Graduate': 0})
data_train[' self_employed'] = data_train[' self_employed'].map({' Yes': 1, ' No': 0})
data_train[" loan_status"]= data_train[" loan_status"].map({" Approved": 1, " Rejected": 0})
# Select features and target variable
X = data_train[[' no_of_dependents', ' education', ' self_employed', ' income_annum', ' loan_amount', ' loan_term',' cibil_score',' residential_assets_value',' commercial_assets_value',' luxury_assets_value',' bank_asset_value']]
y = data_train[' loan_status']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
loan_id 0
no_of_dependents 0
education 0
self_employed 0
income_annum 0
loan_amount 0
loan_term 0
cibil_score 0
residential_assets_value 0
commercial_assets_value 0
luxury_assets_value 0
bank_asset_value 0
loan_status 0
dtype: int64
Training model
--Return--
None
> [0;32m/tmp/ipykernel_4065/3141272518.py[0m(7)[0;36m<module>[0;34m()[0m
[0;32m 5 [0;31m[0mlog_reg[0m[0;34m.[0m[0mfit[0m[0;34m([0m[0mX_train[0m[0;34m,[0m [0my_train[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m 6 [0;31m[0;34m[0m[0m
[0m[0;32m----> 7 [0;31m[0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m 8 [0;31m[0;34m[0m[0m
[0m[0;32m 9 [0;31m[0mprint[0m[0;34m([0m[0;34m"Training completed"[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
Training completed
Saving model to: logistic_regression_model_loan.pkl
Finished saving trained model
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py in get_loc(self, key)
3789 try:
-> 3790 return self._engine.get_loc(casted_key)
3791 except KeyError as err:
index.pyx in pandas._libs.index.IndexEngine.get_loc()
index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'gender'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
/tmp/ipykernel_4065/582419892.py in <module>
----> 1 X_test['gender'].fillna(1, inplace=True)
2
3 X_test.isna().sum()
4
5 y_pred = log_reg.predict(X_test)
~/.local/lib/python3.10/site-packages/pandas/core/frame.py in __getitem__(self, key)
3894 if self.columns.nlevels > 1:
3895 return self._getitem_multilevel(key)
-> 3896 indexer = self.columns.get_loc(key)
3897 if is_integer(indexer):
3898 indexer = [indexer]
~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py in get_loc(self, key)
3795 ):
3796 raise InvalidIndexError(key)
-> 3797 raise KeyError(key) from err
3798 except TypeError:
3799 # If we have a listlike key, _check_indexing_error will raise
KeyError: 'gender'