Python day32
@浙大疏锦行
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifieriris = load_iris()
df = pd.DataFrame(iris.data,columns = iris.feature_names)
df['target'] = iris.targetfeatures = iris.feature_names
target = 'target'#划分数据集
X_train,X_test,y_train,y_test = train_test_split(df[features],df[target],test_size = 0.2,random_state = 42)model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train,y_train)import pdpbox
print(pdpbox.__version__)from pdpbox.info_plots import TargetPlot
# 选择待分析的特征(如:petal length (cm))
feature = 'petal length (cm)'
feature_name = feature # 初始化TargetPlot对象
target_plot = TargetPlot(df=df, # 原始数据(需包含特征和目标列)feature=feature, # 目标特征列feature_name=feature_name, # 特征名称(用于绘图标签)# target='target', # 多分类目标索引(鸢尾花3个类别)target='target', # 多分类目标索引(鸢尾花3个类别)grid_type='percentile', # 分桶方式:百分位num_grid_points=10 # 划分为10个桶
)fig, axes, summary_df = target_plot.plot(which_classes=None, # 绘制所有类别(0,1,2)show_percentile=True, # 显示百分位线engine='plotly',template='plotly_white'
)# 手动设置图表尺寸(单位:像素)
fig.update_layout(width=800, # 宽度800像素height=500, # 高度500像素title=dict(text=f'Target Plot: {feature_name}', x=0.5) # 居中标题
)fig.show()
阅读文档的时候要关注类里的方法、参数和返回值。返回值返回正确才能出好结果