# coding=utf-8 from array import array import matplotlib.pyplot as plt from numpy import * from numpy.ma import arange, exp # 导入数据 def loadDataSet(): #数据集合 dataMat = []; #标签集合 labelMat = [] fr = open('testData.txt') # fr = open('testData2.txt') #处理输入文件,写入集合 for line in fr.readlines(): lineArr = line.strip().split() # dataMat.append([1.0, float(lineArr[0]), float(lineArr[1]), float(lineArr[2])]) dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) # labelMat.append(int(lineArr[3])) labelMat.append(int(lineArr[2])) return dataMat, labelMat #用梯度下降法计算回归系数 def gradAscent(dataMatIn, classLabels): #转为科学计数法表示 dataMatrix = mat(dataMatIn) # convert to NumPy matrix labelMat = mat(classLabels).transpose() # convert to NumPy matrix #矩阵维度,m:数据条数,n:单条数据维度 m, n = shape(dataMatrix) #梯度下降变量α alpha = 0.0001 #循环次数 maxCycles = 500 #回归系数(密度向量) weights = ones((n, 1)) #遍历计算回归系数 for k in range(maxCycles): #矩阵相称 h = sigmoid(dataMatrix * weights) #向量差 error = (labelMat - h) weights = weights + alpha * dataMatrix.transpose() * error return weights def sigmoid(inX): return 1.0 / (1 + exp(-inX)) def GetResult(): dataMat, labelMat = loadDataSet() weights = gradAscent(dataMat, labelMat) print weights plotBestFit(weights) def plotBestFit(weights): dataMat, labelMat = loadDataSet() dataArr = array(dataMat) n = shape(dataArr)[0] xcord1 = []; ycord1 = [] xcord2 = []; ycord2 = [] for i in range(n): if int(labelMat[i]) == 1: xcord1.append(dataArr[i, 1]); ycord1.append(dataArr[i, 2]) else: xcord2.append(dataArr[i, 1]); ycord2.append(dataArr[i, 2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') ax.scatter(xcord2, ycord2, s=30, c='green') x = arange(-3.0, 3.0, 0.1) # y=(0.48*x+4.12414)/(0.616) # y = (-weights[0]-weights[1]*x)/weights[2] y = (-(float)(weights[0][0]) - (float)(weights[1][0]) * x) / (float)(weights[2][0]) ax.plot(x, y) plt.xlabel('X1'); plt.ylabel('X2'); plt.show() if __name__ == '__main__': GetResult()
以上代码可以求出数据的回归系数,之后如何进行实际预测,用哪个API
代码摘自: http://blog.csdn.net/buptgshengod/article/details/24715035
你学过python吗?学到什么程度了。?
没学过,工作要用,不过问题解决了
sklearn.logistic就可以