10分钟
Booster-示例
import xgboost as xgt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
_label_map={
# 'Iris-setosa':0, #经过裁剪的,去掉了 iris 中的 setosa 类
'Iris-versicolor':0,
'Iris-virginica':1
}
class BoosterTest:
'''
测试 Booster
'''
def __init__(self):
df=pd.read_csv('./data/iris.csv')
_feature_names=['Sepal Length','Sepal Width','Petal Length','Petal Width']
x=df[_feature_names]
y=df['Class'].map(lambda x:_label_map[x])
train_X,test_X,train_Y,test_Y=train_test_split(x,y,
test_size=0.3,stratify=y,shuffle=True,random_state=1)
self._train_matrix=xgt.DMatrix(data=train_X,label=train_Y,
eature_names=_feature_names,
feature_types=['float','float','float','float'])
self._validate_matrix = xgt.DMatrix(data=test_X, label=test_Y,
feature_names=_feature_names,
feature_types=['float', 'float', 'float', 'float'])
self._booster=xgt.Booster(params={
'booster':'gbtree',
'silent':0,#打印消息
'eta':0.1, #学习率
'max_depth':5,
'tree_method':'exact',
'objective':'binary:logistic',
'eval_metric':'auc',
'seed':321},
cache=[self._train_matrix,self._validate_matrix])
def test_attribute(self):
'''
测试属性的设置和获取
:return:
'''
self._booster.set_attr(key1= '1')
print('attr:key1 -> ',self._booster.attr('key1'))
print('attr:key2 -> ',self._booster.attr('key2'))
print('attributes -> ',self._booster.attributes())
def test_dump_model(self):
'''
测试 dump 模型
:return:
'''
_dump_str=self._booster.get_dump(fmap='model/booster.feature',
with_stats=True,dump_format='text')
print('dump:',_dump_str[0][:20]+'...' if _dump_str else [])
self._booster.dump_model('model/booster.model',
fmap='model/booster.feature',with_stats=True)
def test_train(self):
'''
训练
:return:
'''
for i in range(0,100):
self._booster.update(self._train_matrix,iteration=i)
print(self._booster.eval(self._train_matrix, name='train', iteration=i))
print(self._booster.eval(self._validate_matrix,name='eval',iteration=i))
def test_importance(self):
'''
测试特征重要性
:return:
'''
print('fscore:',self._booster.get_fscore('model/booster.feature'))
print('score.weight:', self._booster.get_score(importance_type='weight'))
print('score.gain:', self._booster.get_score(importance_type='gain'))
def test(self):
self.test_attribute()
# attr:key1 -> 1
# attr:key2 -> None
# attributes -> {'key1': '1'}
self.test_dump_model()
# dump: []
self.test_train()
# [0] train-auc:0.980816
# [0] eval-auc:0.933333
# ...
# [99] train-auc:0.998367
# [99] eval-auc:0.995556
self.test_dump_model()
# dump: 0:[f2<4.85] yes=1,no...
self.test_importance()
# score: {'f2': 80, 'f3': 72, 'f0': 6, 'f1': 5}
# score.weight: {'Petal Length': 80, 'Petal Width': 72, 'Sepal Length': 6, 'Sepal Width': 5}
# score.gain: {'Petal Length': 3.6525380337500004, 'Petal Width': 2.2072901486111114, 'Sepal Length': 0.06247816666666667, 'Sepal Width': 0.09243024}
if __name__ == '__main__':
BoosterTest().test()
学员评价