【数据挖掘 xgboost】特征的重要程度分析
代码片段import pandas as pdimport xgboost as xgbimport operator# # 从sklearn.cross_validation里选择导入train_test_split用于数据分割。# from sklearn.model_selection import train_test_split# # 从使用train_test_split,利用
·
代码片段
import pandas as pd
import xgboost as xgb
import operator
# # 从sklearn.cross_validation里选择导入train_test_split用于数据分割。
# from sklearn.model_selection import train_test_split
# # 从使用train_test_split,利用随机种子random_state采样25%的数据作为测试集。
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)
#
# # 从sklearn.preprocessing里选择导入数据标准化模块。
# from sklearn.preprocessing import StandardScaler
# # 对训练和测试的特征数据进行标准化。
# ss = StandardScaler()
# X_train = ss.fit_transform(X_train)
# X_test = ss.transform(X_test)
#这里的参数自己改
xgb_params = {'booster':'gbtree','objective': 'binary:logistic', "eta": 0.01, "max_depth": 5, "silent": 0,"colsample_bytree":0.7}
num_rounds = 1000
dtrain = xgb.DMatrix(X_train, label=y_train)
gbdt = xgb.train(xgb_params, dtrain, num_rounds)
importance = gbdt.get_fscore()
importance = sorted(importance.items(), key=operator.itemgetter(1))
print importance
更多推荐
所有评论(0)