课程评价 (0)

请对课程作出评价:
0/300

学员评价

暂无精选评价
10分钟

直接学习-示例

class TrainTest:
  def __init__(self):
    df = pd.read_csv('./data/iris.csv')
    _feature_names = ['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']
    x = df[_feature_names]
    y = df['Class'].map(lambda x: _label_map[x])
​
    train_X, test_X, train_Y, test_Y = train_test_split(x, y, test_size=0.3, 
                stratify=y, shuffle=True, random_state=1)
    self._train_matrix = xgt.DMatrix(data=train_X, label=train_Y, 
                feature_names=_feature_names,
                feature_types=['float', 'float', 'float', 'float'])
    self._validate_matrix = xgt.DMatrix(data=test_X, label=test_Y,
                feature_names=_feature_names,
                feature_types=['float', 'float', 'float', 'float'])
​
  def train_test(self):
    params={
      'booster':'gbtree',
      'eta':0.01,
      'max_depth':5,
      'tree_method':'exact',
      'objective':'binary:logistic',
      'eval_metric':['logloss','error','auc']
    }
    eval_rst={}
    booster=xgt.train(params,self._train_matrix,num_boost_round=20,
         evals=([(self._train_matrix,'valid1'),(self._validate_matrix,'valid2')]),
         early_stopping_rounds=5,evals_result=eval_rst,verbose_eval=True)
    ## 训练输出
    # Multiple eval metrics have been passed: 'valid2-auc' will be used for early stopping.
    # Will train until valid2-auc hasn't improved in 5 rounds.
    # [0]   valid1-logloss:0.685684 valid1-error:0.042857   valid1-auc:0.980816 valid2-logloss:0.685749 valid2-error:0.066667   valid2-auc:0.933333
    # ...
    # Stopping. Best iteration:
    # [1]   valid1-logloss:0.678149 valid1-error:0.042857   valid1-auc:0.99551  valid2-logloss:0.677882 valid2-error:0.066667   valid2-auc:0.966667
    
    print('booster attributes:',booster.attributes())
    # booster attributes: {'best_iteration': '1', 'best_msg': '[1]\tvalid1-logloss:0.678149\tvalid1-error:0.042857\tvalid1-auc:0.99551\tvalid2-logloss:0.677882\tvalid2-error:0.066667\tvalid2-auc:0.966667', 'best_score': '0.966667'} 
    
    print('fscore:', booster.get_fscore())
    # fscore: {'Petal Length': 8, 'Petal Width': 7}
    
    print('eval_rst:',eval_rst)
    # eval_rst: {'valid1': {'logloss': [0.685684, 0.678149, 0.671075, 0.663787, 0.656948, 0.649895], 'error': [0.042857, 0.042857, 0.042857, 0.042857, 0.042857, 0.042857], 'auc': [0.980816, 0.99551, 0.99551, 0.99551, 0.99551, 0.99551]}, 'valid2': {'logloss': [0.685749, 0.677882, 0.670747, 0.663147, 0.656263, 0.648916], 'error': [0.066667, 0.066667, 0.066667, 0.066667, 0.066667, 0.066667], 'auc': [0.933333, 0.966667, 0.966667, 0.966667, 0.966667, 0.966667]}}
    
  def cv_test(self):
    params = {
      'booster': 'gbtree',
      'eta': 0.01,
      'max_depth': 5,
      'tree_method': 'exact',
      'objective': 'binary:logistic',
      'eval_metric': ['logloss', 'error', 'auc']
    }
​
    eval_history = xgt.cv(params, self._train_matrix,num_boost_round=20, 
         nfold=3,stratified=True,metrics=['error', 'auc'],
         early_stopping_rounds=5,verbose_eval=True,shuffle=True)
    ## 训练输出
    # [0]   train-auc:0.974306+0.00309697   train-error:0.0428743+0.0177703 test-auc:0.887626+0.0695933 test-error:0.112374+0.0695933
    #....
    print('eval_history:', eval_history)
    # eval_history:    test-auc-mean  test-auc-std  test-error-mean  test-error-std  \
    # 0       0.887626      0.069593         0.112374        0.069593   
    # 1       0.925821      0.020752         0.112374        0.069593   
    # 2       0.925821      0.020752         0.098485        0.050631   
​
    # train-auc-mean  train-auc-std  train-error-mean  train-error-std  
    # 0        0.974306       0.003097          0.042874          0.01777  
    # 1        0.987893       0.012337          0.042874          0.01777  
    # 2        0.986735       0.011871          0.042874          0.01777