银行用户信誉等级
交叉熵任务,一批用户有几种属性 定义实际规则 根据几种属性值 对他进行AAAA AAA AA A B C D E 几种级别的评分用交叉熵实现 后面再生成测试数据来预测 建立模型pytorch
import torch import torch.nn as nn import numpy as np from sklearn.model_selection import train_test_split# 用户属性定义规则 def generate_user_data(num_samples=5000):"""生成模拟用户数据及评分标签属性规则:- age: 18-70岁,年龄越大越可能高评级- income: 0-100万,收入越高评级越高- activity: 0-100,活跃度越高评级越高- credit: 300-900,信用分越高评级越高"""np.random.seed(42)# 生成4种用户属性age = np.random.randint(18, 71, size=num_samples)income = np.random.exponential(scale=20, size=num_samples).clip(0, 100)activity = np.random.normal(loc=60, scale=20, size=num_samples).clip(0, 100)credit = np.random.normal(loc=600, scale=150, size=num_samples).clip(300, 900)# 组合特征矩阵features = np.column_stack([age, income, activity, credit])# 定义评分规则 (AAAA到E共8个等级)def calculate_rating(x):score = 0.3 * x[0] / 70 + 0.2 * x[1] / 100 + 0.2 * x[2] / 100 + 0.3 * x[3] / 900if score > 0.9:return 0 # AAAAelif score > 0.8:return 1 # AAAelif score > 0.7:return 2 # AAelif score > 0.6:return 3 # Aelif score > 0.5:return 4 # Belif score > 0.4:return 5 # Celif score > 0.3:return 6 # Delse:return 7 # Elabels = np.apply_along_axis(calculate_rating, 1, features)return torch.FloatTensor(features), torch.LongTensor(labels)# 定义评分模型 class RatingModel(nn.Module):def __init__(self, input_dim=4, num_classes=8):super().__init__()self.net = nn.Sequential(nn.Linear(input_dim, 32),nn.ReLU(),nn.Linear(32, 64),nn.ReLU(),nn.Linear(64, num_classes))def forward(self, x):return self.net(x)# 训练与评估 def main():# 数据准备X, y = generate_user_data()X = (X - X.mean(dim=0)) / X.std(dim=0) # 标准化train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)# 模型初始化model = RatingModel()criterion = nn.CrossEntropyLoss()optimizer = torch.optim.Adam(model.parameters(), lr=0.01)# 训练循环for epoch in range(100):model.train()outputs = model(train_X)loss = criterion(outputs, train_y)optimizer.zero_grad()loss.backward()optimizer.step()if epoch % 10 == 0:model.eval()with torch.no_grad():preds = torch.argmax(model(test_X), dim=1)acc = (preds == test_y).float().mean()print(f"Epoch {epoch}, Loss: {loss.item():.4f}, Acc: {acc:.2%}")# 测试结果分析rating_labels = ['AAAA', 'AAA', 'AA', 'A', 'B', 'C', 'D', 'E']with torch.no_grad():test_outputs = model(test_X)test_probs = torch.softmax(test_outputs, dim=1)print("\n=== 测试样本预测示例 ===")for i in range(40):true_rating = rating_labels[test_y[i].item()]pred_rating = rating_labels[torch.argmax(test_probs[i]).item()]print(f"用户{i + 1}: 真实评级={true_rating}, 预测评级={pred_rating}")print(f"属性值: 年龄={test_X[i][0]:.1f}, 收入={test_X[i][1]:.1f}, "f"活跃度={test_X[i][2]:.1f}, 信用分={test_X[i][3]:.1f}")print(f"预测概率分布: {dict(zip(rating_labels, test_probs[i].numpy().round(3)))}")print("-" * 50)if __name__ == "__main__":main()