这篇文章主要是前面整理的,就直接上图了。
实验:
# coding:UTF-8
#################
# OS_ELM
# author : zhiyong_will
# date : 2015.3.22
#################
from __future__ import division
from datetime import datetime
from csv import DictReader
from math import exp
import random
from numpy import *
import string
#####设置相关参数########
trainData = "C:\\Users\\dell\\Desktop\\OS-ELM\\segment_train.csv"
testData = "C:\Users\dell\Desktop\OS-ELM\\segment_test.csv"
#隐含神经元的个数
nHiddenNeurons = 180
#输入层的神经元个数
nInputNeurons = 19
#初始训练集的大小
NO = 280
#函数
def sig(tData, Iw, bias, num):
'''
tData:样本矩阵:样本数*特征数
Iw:输入层到第一个隐含层的权重:隐含层神经元数*特整数
bias:偏置1*隐含神经元个数
'''
v = tData * Iw.T #样本数*隐含神经元个数
bias_1 = ones((num, 1)) * bias
v = v + bias_1
H = 1./(1+exp(-v))
return H
##导入数据集
firstTrainData = []
firstTrainLable = []
# 处理训练样本
for t, row in enumerate(DictReader(open(trainData))):
Id = row['Id']
del row['Id']
del row['I0']
data = []
if int(Id) < NO:
# 处理是否被点击
if row['Label'] == '1.00000000':
y = 1
elif row['Label'] == '2.00000000':
y = 2
elif row['Label'] == '3.00000000':
y = 3
elif row['Label'] == '4.00000000':
y = 4
elif row['Label'] == '5.00000000':
y = 5
elif row['Label'] == '6.00000000':
y = 6
else:
y = 7
del row['Label']
firstTrainLable.append(y)
# 处理特征
for key in row:
value = string.atof(row[key])
#index = int(value + key[1:], 16) % D
data.append(value)
firstTrainData.append(data)
continue
elif int(Id) == NO:#开始训练
p0 = mat(firstTrainData)
T0 = zeros((NO, 7))
#处理样本标签
for i in xrange(0, NO):
a = firstTrainLable[i]
T0[i][a-1] = 1
T0 = T0 * 2 - 1
Iw = mat(random.rand(nHiddenNeurons, nInputNeurons) * 2 - 1)#随机生成区间-1,1之间的随机矩阵
bias = mat(random.rand(1, nHiddenNeurons))
H0 = sig(p0, Iw, bias, NO)#样本数*隐含神经元个数
M = (H0.T * H0).I
beta = M * H0.T * T0
else:#训练剩余的样本,每次训练一条样本
# 处理label
if row['Label'] == '1.00000000':
y = 1
elif row['Label'] == '2.00000000':
y = 2
elif row['Label'] == '3.00000000':
y = 3
elif row['Label'] == '4.00000000':
y = 4
elif row['Label'] == '5.00000000':
y = 5
elif row['Label'] == '6.00000000':
y = 6
else:
y = 7
del row['Label']
Tn = zeros((1, 7))
#处理样本标签
b = y
Tn[0][b-1] = 1
Tn = Tn * 2 - 1
# 处理特征
data = []
for key in row:
value = string.atof(row[key])
data.append(value)
pn = mat(data)
H = sig(pn, Iw, bias, 1)
M = M - M * H.T * (eye(1,1) + H * M * H.T).I * H * M
beta = beta + M * H.T * (Tn - H * beta)
# 计算训练误差
correct = 0
sum = 0
for t, row in enumerate(DictReader(open(trainData))):
del row['Id']
del row['I0']
# 处理是否被点击
if row['Label'] == '1.00000000':
y = 1
elif row['Label'] == '2.00000000':
y = 2
elif row['Label'] == '3.00000000':
y = 3
elif row['Label'] == '4.00000000':
y = 4
elif row['Label'] == '5.00000000':
y = 5
elif row['Label'] == '6.00000000':
y = 6
else:
y = 7
del row['Label']
# 处理特征
data = []
for key in row:
value = string.atof(row[key])
data.append(value)
p = mat(data)
HTrain = sig(p, Iw, bias, 1)
Y = HTrain * beta
# 判断
if argmax(Y) + 1 == y:
correct += 1
sum += 1
print("训练准确性为:%f" % (correct/sum))
# 计算测试误差
correctTest = 0
sumTest = 0
for t, row in enumerate(DictReader(open(testData))):
del row['Id']
del row['I0']
# 处理是否被点击
if row['Label'] == '1.00000000':
y = 1
elif row['Label'] == '2.00000000':
y = 2
elif row['Label'] == '3.00000000':
y = 3
elif row['Label'] == '4.00000000':
y = 4
elif row['Label'] == '5.00000000':
y = 5
elif row['Label'] == '6.00000000':
y = 6
else:
y = 7
del row['Label']
# 处理特征
data = []
for key in row:
value = string.atof(row[key])
data.append(value)
p = mat(data)
HTrain = sig(p, Iw, bias, 1)
Y = HTrain * beta
# 判断
if argmax(Y) + 1 == y:
correctTest += 1
sumTest += 1
print("测试准确性为:%f" % (correctTest/sumTest))
实验结果:
参考文章:
A Fast and Accurate Online Sequential Learning Algorithm for Feedforward Networks