我试着写这个程序。请告诉我它是否有效,如果是的话,它的准确性如何,我如何计算它?
import numpy as np
import tensorflow as tf
import pandas as pd
df=pd.read_csv(r'C:\Users\manas\Downloads\diabetes.csv')
actualY=df['Outcome']
actualX=df.drop(['Outcome'],axis=1)
actualX=np.array(np.reshape(actualX,newshape=[768,8]))
actualY=np.array(np.reshape(actualY,newshape=[768,1]))
#Y=[768,1]
#X=[768,8]
x=tf.placeholder(dtype=tf.float32,shape=[768,8])
W1=tf.Variable((tf.random_normal(shape=[8,1])))
B1=tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[1,1]))
y_prediction=(tf.nn.sigmoid(tf.add(tf.matmul(x,W1),B1)))
y_true=tf.placeholder(dtype=tf.float32,shape=[768,1])
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_prediction,labels=y_true))
optimizer=tf.train.GradientDescentOptimizer(0.1).minimize(loss)
sess=tf.InteractiveSession()
tf.global_variables_initializer().run()
for i in range(1000):
(sess.run(optimizer,feed_dict={x:actualX,y_true:actualY}))
发布于 2018-08-14 10:28:10
我已经修改了代码片段,以便每100年代输出一次准确性。你可以在上面运行Kaggle.
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
# Any results you write to the current directory are saved as output.
import numpy as np
import tensorflow as tf
import pandas as pd
# Clean up in case repeatedly running in jupyter notebook
tf.reset_default_graph()
# Get reproducable results by making the weight initialization always the same.
tf.set_random_seed(0)
df = pd.read_csv('../input/diabetes.csv')
actualY = df['Outcome'].values
actualX = df.drop(['Outcome'], axis=1).values
actualX = np.array(np.reshape(actualX,newshape=[768,8]))
actualY = np.array(np.reshape(actualY,newshape=[768,1]))
x = tf.placeholder(dtype=tf.float32,shape=[768,8])
W1 = tf.Variable((tf.random_normal(shape=[8,1])))
B1 = tf.Variable(dtype=tf.float32,initial_value=tf.zeros(shape=[1,1]))
y_prediction = tf.nn.sigmoid(tf.add(tf.matmul(x, W1), B1))
y_true = tf.placeholder(dtype=tf.float32, shape=[768,1])
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_prediction,labels=y_true))
optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
# Compute accuracy
accuracy_op, update_op = tf.metrics.accuracy(labels=y_true, predictions=y_prediction)
with tf.Session() as sess:
tf.global_variables_initializer().run()
# Needed to initialzie accuracy ops
tf.local_variables_initializer().run()
for i in range(1000):
# Compute accuracy every 100th epoch
if (i + 1) % 100 == 0:
_, _, accuracy = sess.run([optimizer, update_op, accuracy_op],feed_dict={x: actualX, y_true: actualY})
print('Epoch: {}, accuracy: {:.3}'.format(i + 1, accuracy))
else:
sess.run(optimizer,feed_dict={x: actualX, y_true: actualY})
这给了0.578的精确训练,这不是很好。如果你想改进模型和希望更好地理解应该如何评估结果,我建议你看看以下链接:
https://www.kaggle.com/uciml/pima-indians-diabetes-database/kernels
https://course.fast.ai/
https://stackoverflow.com/questions/-100002123
复制相似问题