2.2.1
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import pickle
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
# 加载数据
file_path = 'finance数据集.csv'
data = pd.__________(file_path)
---
data = pd.read_csv(file_path)
# 显示前五行的数据
print(data.__________())
---
print(data.head())
# 选择自变量和因变量
X = data.__________(['SeriousDlqin2yrs', 'Unnamed: 0'], axis=1)
y = data['__________']
---
X = data.drop(['SeriousDlqin2yrs', 'Unnamed: 0'], axis=1)
y = data['SeriousDlqin2yrs']
代码解释:
-
['SeriousDlqin2yrs', 'Unnamed: 0']
:要删除的列名列表。SeriousDlqin2yrs
:目标变量(是否逾期),需从特征中移除。Unnamed: 0
:可能是导入数据时生成的索引列(无实际意义,需删除)。
-
axis=1
:指定删除列(axis=1
)。若为axis=0
则删除行(按索引)。
# 分割训练集和测试集
X_train, X_test, y_train, y_test = __________(X, y, test_size=0.2, random_state=42)
---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 训练 Logistic 回归模型
model = __________(max_iter=1000)
model.fit(__________, y_train)
---
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
代码解释:训练,train;测试test
# 保存模型
with open('2.2.1_model.pkl', 'wb') as file:
pickle.__________(model, file)
---
pickle.dump(model, file)
代码解释:pickle库里面的dump是保存
# 预测并保存结果
# 预测并保存结果
y_pred = model.__________(X_test)
pd.DataFrame(y_pred, columns=['预测结果']).to_csv('2.2.1_results.txt', index=False)
---
y_pred = model.predict(X_test)
print(f"模型准确率: {accuracy:.2f}")
代码解释:predict预测,没有x,y,填写predict;有xy,写pred
# 分析测试结果
accuracy = (y_test == __________).mean()
print(f"模型准确率: {__________:.2f}")
---
accuracy = (y_test == y_pred).mean()
print(f"模型准确率: {accuracy:.2f}")
代码解释:
预测值=测试值,就是测试结果
accuracy,测试结果
# 处理数据不平衡
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(__________, __________)
---
X_resampled, y_resampled = smote.fit_resample(X, y)
代码解释:需要训练集进行均衡,
# 重新训练模型
model.fit(__________, __________)
---
model.fit(X_resampled, y_resampled)
# 重新预测
y_pred_resampled = model.__________(X_test)
---
y_pred_resampled = model.predict(X_test)
# 生成新的测试报告
report_resampled = classification_report(y_test, y_pred_resampled, zero_division=1)
with open('2.2.1_report_xg.txt', 'w') as file:
file.write(__________)
---
file.write(report_resampled)
ps:pip install imblearn -i https://pypi.tuna.tsinghua.edu.cn/simple