import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import pandas as pd import warnings import sklearn from sklearn.linear_model import LogisticRegressionCV from sklearn.linear_model.coordinate_descent import ConvergenceWarning from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier from sklearn.preprocessing import label_binarize from sklearn import metrics # 设置字符集,防止中文乱码 mpl.rcParams['font.sans-serif']=[u'simHei'] mpl.rcParams['axes.unicode_minus']=False # 拦截异常 warnings.filterwarnings(action = 'ignore', category=ConvergenceWarning) # 数据加载 path = "datas/iris.data" names = ['sepal length', 'sepal width', 'petal length', 'petal width', 'cla'] df = pd.read_csv(path, header=None, names=names) df['cla'].value_counts() df.head()
def parseRecord(record): result=[] r = zip(names,record) for name,v in r: if name == 'cla': if v == 'Iris-setosa': result.append(1) elif v == 'Iris-versicolor': result.append(2) elif v == 'Iris-virginica': result.append(3) else: result.append(np.nan) else: result.append(float(v)) return result