KNN决定树探究泰坦尼克号幸存者问题
- import pandas as pdfrom sklearn.tree import DecisionTreeClassifier, export_graphvizfrom sklearn.metrics import classification_reportimport graphviz #决定树可视化
复制代码- data = pd.read_csv(r"titanic_data.csv")data.drop("PassengerId",axis = 1,inplace = True) #删除id这一列
复制代码 SurvivedPclassSexAge003male22.0111female38.0213female26.0311female35.0403male35.0...............88602male27.088711female19.088803femaleNaN88911male26.089003male32.0 891 rows × 4 columns
- data.loc[data["Sex"] == "male","Sex"] = 1data.loc[data["Sex"] == "female","Sex"] = 0
复制代码 SurvivedPclassSexAge003122.0111038.0213026.0311035.0403135.0...............88602127.088711019.0888030NaN88911126.089003132.0 891 rows × 4 columns
- data.fillna(data["Age"].mean(),inplace = True) #用均值来填充缺失值
复制代码 SurvivedPclassSexAge003122.000000111038.000000213026.000000311035.000000403135.000000...............88602127.00000088711019.00000088803029.69911888911126.00000089003132.000000 891 rows × 4 columns
- Dtc = DecisionTreeClassifier(max_depth = 5,random_state =8) #构建决定树Dtc.fit(data.iloc[:,1:],data["Survived"]) #模子训练pre = Dtc.predict(data.iloc[:,1:]) #模子预测
复制代码- print(classification_report(pre,data["Survived"])) #肴杂矩阵
复制代码- precision recall f1-score support 0 0.88 0.84 0.86 573 1 0.73 0.79 0.76 318 accuracy 0.82 891 macro avg 0.81 0.82 0.81 891weighted avg 0.83 0.82 0.82 891
复制代码- pre == data["Survived"] #比力模子预测值与实际值是否一致
复制代码- 0 True1 True2 True3 True4 True ... 886 True887 True888 False889 False890 TrueName: Survived, Length: 891, dtype: bool
复制代码 可视化
- dot_data = export_graphviz(Dtc,feature_names = ["Pclass","Sex","Age"],class_names="Survive")
复制代码- graph = graphviz.Source(dot_data)graph
复制代码
来源:https://blog.csdn.net/qq_45176548/article/details/112060492
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作! |