课程评价 (0)

请对课程作出评价:
0/300

学员评价

暂无精选评价
10分钟

Booster-示例

import xgboost as xgt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
​
_label_map={
  # 'Iris-setosa':0, #经过裁剪的,去掉了 iris 中的 setosa 类
  'Iris-versicolor':0,
  'Iris-virginica':1
}
class BoosterTest:
  '''
  测试 Booster
  '''
  def __init__(self):
    df=pd.read_csv('./data/iris.csv')
    _feature_names=['Sepal Length','Sepal Width','Petal Length','Petal Width']
    x=df[_feature_names]
    y=df['Class'].map(lambda x:_label_map[x])
​
    train_X,test_X,train_Y,test_Y=train_test_split(x,y,
            test_size=0.3,stratify=y,shuffle=True,random_state=1)
    self._train_matrix=xgt.DMatrix(data=train_X,label=train_Y,
                        eature_names=_feature_names,
                        feature_types=['float','float','float','float'])
    self._validate_matrix = xgt.DMatrix(data=test_X, label=test_Y, 
                        feature_names=_feature_names,
                        feature_types=['float', 'float', 'float', 'float'])
    self._booster=xgt.Booster(params={
      'booster':'gbtree',
      'silent':0,#打印消息
      'eta':0.1, #学习率
      'max_depth':5,
      'tree_method':'exact',
      'objective':'binary:logistic',
      'eval_metric':'auc',
      'seed':321},
      cache=[self._train_matrix,self._validate_matrix])
​
  def test_attribute(self):
    '''
    测试属性的设置和获取
    :return:
    '''
    self._booster.set_attr(key1= '1')
    print('attr:key1 -> ',self._booster.attr('key1'))
    print('attr:key2 -> ',self._booster.attr('key2'))
    print('attributes -> ',self._booster.attributes())
  def test_dump_model(self):
    '''
    测试 dump 模型
    :return:
    '''
    _dump_str=self._booster.get_dump(fmap='model/booster.feature',
                                     with_stats=True,dump_format='text')
    print('dump:',_dump_str[0][:20]+'...' if _dump_str else [])
    self._booster.dump_model('model/booster.model',
                             fmap='model/booster.feature',with_stats=True)
  def test_train(self):
    '''
    训练
    :return:
    '''
    for i in range(0,100):
      self._booster.update(self._train_matrix,iteration=i)
      print(self._booster.eval(self._train_matrix, name='train', iteration=i))
      print(self._booster.eval(self._validate_matrix,name='eval',iteration=i))
  def test_importance(self):
    '''
    测试特征重要性
    :return:
    '''
    print('fscore:',self._booster.get_fscore('model/booster.feature'))
    print('score.weight:', self._booster.get_score(importance_type='weight'))
    print('score.gain:', self._booster.get_score(importance_type='gain'))
​
  def test(self):
    self.test_attribute()
    # attr:key1 ->  1
    # attr:key2 ->  None
    # attributes ->  {'key1': '1'}
    self.test_dump_model()
    # dump: []
    self.test_train()
    # [0]   train-auc:0.980816
    # [0]   eval-auc:0.933333
    # ...
    # [99]  train-auc:0.998367
    # [99]  eval-auc:0.995556
    self.test_dump_model()
    # dump: 0:[f2<4.85] yes=1,no...
    self.test_importance()
    # score: {'f2': 80, 'f3': 72, 'f0': 6, 'f1': 5}
    # score.weight: {'Petal Length': 80, 'Petal Width': 72, 'Sepal Length': 6, 'Sepal Width': 5}
    # score.gain: {'Petal Length': 3.6525380337500004, 'Petal Width': 2.2072901486111114, 'Sepal Length': 0.06247816666666667, 'Sepal Width': 0.09243024}
​
if __name__ == '__main__':
  BoosterTest().test()