分类问题-机器学习
-
分类:感知机
简单判断图片是纵向还是横向
训练数据:images1.csv
x1,x2,y 153,432,-1 220,262,-1 118,214,-1 474,384,1 485,411,1 233,430,-1 396,361,1 484,349,1 429,259,1 286,220,1 399,433,-1 403,340,1 252,34,1 497,472,1 379,416,-1 76,163,-1 263,112,1 26,193,-1 61,473,-1 420,253,1
import numpy as np import matplotlib.pyplot as plttrain = np.loadtxt('images1.csv',delimiter=',',skiprows=1)#取第一列和第二列 train_x = train[:,0:2] #取第三列 train_y = train[:,2]#plt.plot(train_x[train_y == 1,0],train_x[train_y == 1,1],'o') #plt.plot(train_x[train_y==-1,0],train_x[train_y==-1,1],'x')#plt.axis('scaled') #plt.show()#权重初始化 #w·x = w1x1 + w2x2 = 0 w = np.random.rand(2)#判别函数 def f(x):if np.dot(w,x)>=0:return 1else:return -1;#迭代次数 epoch = 10 #更新次数 count = 0#学习权重 for _ in range(epoch):for x,y in zip(train_x,train_y):if f(x) != y:w = w + y*x#输出日志count += 1print('第{}次:w={}'.format(count,w))#w·x = w1x1 + w2x2 = 0 #x2 = -w1/w2*x1 x1 = np.arange(0,500) plt.plot(train_x[train_y == 1,0],train_x[train_y==1,1],'o') plt.plot(train_x[train_y==-1,0],train_x[train_y==-1,1],'x') plt.plot(x1,-w[0]/w[1]*x1,linestyle = 'dashed') plt.show()#预测 #200x100 横向 print(f([200,100]))#100x200 纵向 print(f([100,200]))
-
分类:逻辑回归
训练数据:images2.csv
x1,x2,y 153,432,0 220,262,0 118,214,0 474,384,1 485,411,1 233,430,0 396,361,1 484,349,1 429,259,1 286,220,1 399,433,0 403,340,1 252,34,1 497,472,1 379,416,0 76,163,0 263,112,1 26,193,0 61,473,0 420,253,1
import numpy as np import matplotlib.pyplot as plt#读入 train = np.loadtxt('images2.csv',delimiter=',',skiprows=1) train_x = train[:,0:2] train_y = train[:,2]#初始化参数 theta = np.random.rand(3)#标准化 #axis=0会计算每列的平均值和标准差 mu = train_x.mean(axis=0) sigma = train_x.std(axis=0) def standardize(x):return (x-mu)/sigmatrain_z = standardize(train_x)#增加x0 def to_matrix(x):#创建和x1一样的行一列的矩阵x0 = np.ones([x.shape[0],1])#参数合并成一个矩阵return np.hstack([x0,x])X = to_matrix(train_z)#可视化 ''' plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o') plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x') plt.show() '''#sigmoid函数 def f(x):return 1/(1+np.exp(-np.dot(x,theta)))#学习率 ETA = 1e-3#迭代次数 epoch = 5000#重复学习 for _ in range(epoch):theta = theta - ETA*np.dot(f(X)-train_y,X)#theta.Tx = 0 #theta.Tx = theta0x0 + theta1x1 + theta2x2 = 0 #x2 = -(theta0 + theta1*x1)/theta2x0 = np.linspace(-2,2,100) plt.plot(train_z[train_y==1,0],train_z[train_y==1,0],'o') plt.plot(train_z[train_y==0,0],train_z[train_y==0,0],'x') plt.plot(x0,-(theta[0]+theta[1]*x0)/theta[2],linestyle='dashed') plt.show()#预测 #astype(np.int_):将布尔值转为整数(True→1,False→0),最终输出0或1的分类结果。 def classify1(x):return (f(x)>=0.5).astype(np.int_)array = classify1(to_matrix(standardize([[200,100],[100,200] ])))print(array)
-
分类:线性不可分分类问题
训练数据:
x1,x2,y 0.54508775,2.34541183,0 0.32769134,13.43066561,0 4.42748117,14.74150395,0 2.98189041,-1.81818172,1 4.02286274,8.90695686,1 2.26722613,-6.61287392,1 -2.66447221,5.05453871,1 -1.03482441,-1.95643469,1 4.06331548,1.70892541,1 2.89053966,6.07174283,0 2.26929206,10.59789814,0 4.68096051,13.01153161,1 1.27884366,-9.83826738,1 -0.1485496,12.99605136,0 -0.65113893,10.59417745,0 3.69145079,3.25209182,1 -0.63429623,11.6135625,0 0.17589959,5.84139826,0 0.98204409,-9.41271559,1 -0.11094911,6.27900499,0
import numpy as np import matplotlib.pyplot as plt#读入 train = np.loadtxt('data3.csv',delimiter=',',skiprows=1) train_x = train[:,0:2] train_y = train[:,2]''' plt.plot(train_x[train_y==1,0],train_x[train_y==1,1],'o') plt.plot(train_x[train_y==0,0],train_x[train_y==0,1],'x') plt.show() '''#参数初始化 theta = np.random.rand(4)#精度历史记录 accuracies = []#标准化 mu = train_x.mean(axis=0) sigma = train_x.std(axis=0) def standardize(x):return (x-mu)/sigmatrain_z = standardize(train_x)#增加x0和x3 def to_matrix(x):x0 = np.ones([x.shape[0],1])x3 = x[:,0,np.newaxis]**2return np.hstack([x0,x,x3])X = to_matrix(train_z)#sigmoid函数 def f(x):return 1/(1+np.exp(-np.dot(x,theta)))#学习率 ETA = 1e-3#迭代次数 epoch = 5000def classify1(x):return (f(x)>=0.5).astype(np.int_)#重复学习 for _ in range(epoch):theta = theta - ETA*np.dot(f(X)-train_y,X)#计算现在精度result = classify1(X) == train_yaccuracy = len(result[result==True])/len(result)accuracies.append(accuracy)#theta.Tx = theta0x0 + theta1x1 + theta2x2 + theta3x3^2 # = theta0 + theta1x1 + theta2x2 + theta3x1^2 = 0 #x2 = -(theta0+theta1x1+theta3x1^2)/theta2 x1 = np.linspace(-2,2,100) x2 = -(theta[0]+theta[1]*x1+theta[3]*x1**2)/theta[2] plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o') plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x') plt.plot(x1,x2,linestyle='dashed') plt.show()#绘制acc曲线 # x = np.arange(len(accuracies)) # plt.plot(x,accuracies) # plt.show()
因为训练数据过少只有20个 精度值只能为0.05的整数倍 所以acc曲线有棱有角:
-
分类:线性不可分分类问题 随机梯度下降法的实现
训练数据:同上
import numpy as np import matplotlib.pyplot as plt#读入 train = np.loadtxt('data3.csv',delimiter=',',skiprows=1) train_x = train[:,0:2] train_y = train[:,2]''' plt.plot(train_x[train_y==1,0],train_x[train_y==1,1],'o') plt.plot(train_x[train_y==0,0],train_x[train_y==0,1],'x') plt.show() '''#参数初始化 theta = np.random.rand(4)#精度历史记录 accuracies = []#标准化 mu = train_x.mean(axis=0) sigma = train_x.std(axis=0) def standardize(x):return (x-mu)/sigmatrain_z = standardize(train_x)#增加x0和x3 def to_matrix(x):x0 = np.ones([x.shape[0],1])x3 = x[:,0,np.newaxis]**2return np.hstack([x0,x,x3])X = to_matrix(train_z)#sigmoid函数 def f(x):return 1/(1+np.exp(-np.dot(x,theta)))#学习率 ETA = 1e-3#迭代次数 epoch = 5000def classify1(x):return (f(x)>=0.5).astype(np.int_)#重复学习 for _ in range(epoch):#使用随机梯度下降法更新参数p = np.random.permutation(X.shape[0])for x,y in zip(X[p,:],train_y[p]):theta = theta - ETA*(f(x)-y)*x#theta.Tx = theta0x0 + theta1x1 + theta2x2 + theta3x3^2 # = theta0 + theta1x1 + theta2x2 + theta3x1^2 = 0 #x2 = -(theta0+theta1x1+theta3x1^2)/theta2 x1 = np.linspace(-2,2,100) x2 = -(theta[0]+theta[1]*x1+theta[3]*x1**2)/theta[2] plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o') plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x') plt.plot(x1,x2,linestyle='dashed') plt.show()