python逻辑回归

  1. 逻辑回归
#-*- coding:UTF-8 -*- #允许中文


########简单逻辑回归################
from collections import OrderedDict
import pandas as pd
import matplotlib.pyplot as plt   #散点图matplotlib
import numpy

#1. 数据集
examDict={'LearningTime':[0.5,0.75,1.00,1.25,1.50,1.75,1.75,2.00,2.25,2.50,2.75,3.00,3.25,3.50,4.00,4.25,4.50,4.75,5.00,5.50],
         'Score':[0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1]}
examOrderDict=OrderedDict(examDict)
examDf=pd.DataFrame(examOrderDict)
exam_X=examDf.loc[:,'LearningTime'] #提取特征
exam_y=examDf.loc[:,'Score']        #提取label

#作图看分布情况
#plt.scatter(exam_X,exam_y,color="b",label="exam data")#散点图matplotlib, b:blue, w:white等
#plt.xlabel("Learning Time")  #添加图标
#plt.ylabel("Score")          #添加图标
#plt.show()                   #显示图像


#2. 建立训练集和测试集
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(exam_X,exam_y,train_size= .8)     #train_size训练集占比
#输出特征和标签
print('OriginalFeature: ',exam_X.shape,'TrainingFeature: ',X_train.shape,'TestFeature: ',X_test.shape)    #.shape可以快速读取矩阵的形状
print('OriginalLable: ',exam_X.shape,'TrainingLable: ',y_train.shape,'TestLable: ',y_test.shape)
print(type(X_train))

##相关系数矩阵
#rDf=examDf.corr() #.corr()矩阵相关性
#print(rDf)

#3. 将训练集特征转化为二维数组**行1列
X_train=X_train.values.reshape(-1,1)
#将测试集特征转化为二维数组**行1列
X_test=X_test.values.reshape(-1,1)

#查看用于训练集和测试集数据
#print("The training set is \n" + str(X_train))
#print("The test set is \n" + str(X_test))

#4. 训练模型--逻辑回归
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
model.fit(X_train,y_train) 

#LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
#penalty='l2', random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

##最佳拟合线
#a=model.coef_
#b=model.intercept_
#print("The coeffeciency of the model is " + str(a) + ". The fit_intercept of the model is " + str(b))  #z=ax+b,y=1/(1+exp(-z))

#评估模型精确度:决定系数R平方看模型拟合程度
ModelScore=model.score(X_test,y_test)
print("The Score of the model is " + str(ModelScore))

#实施模型
import numpy as np
b=model.intercept_
a=model.coef_
x=3
z=b+a*x
pred_Y=1/(1+np.exp(-z))       #逻辑回归的计算公式
print('The probability of passing the test is :',pred_Y)
Tags: