TF 2.x --只是为了体验一下我尝试使用一个简单的实验数据集--来说明这个问题:
import numpy as np
import tensorflow as tf
import keras
from tensorflow.keras.callbacks import LambdaCallback
import tensorflow_datasets as tfds
data, info = tfds.load('iris', split='train[:80%]',
as_supervised=True, with_info=True)
print(info)
features, labels = tuple(zip(*data))
# NB: the generator should yield a dictionary for the inputs, and the output as is.
def gen(x_train, y_train):
print('generator initiated')
(x_train, y_train)= tfds.load('iris', shuffle_files=True, as_supervised=True, with_info=True)
idx = 0
while True:
yield tf.transpose([x_train[:32], tf.one_hot(y_train[:32])])
print('generator yielded a batch %d' % idx)
idx += 1
train_ds = tf.data.Dataset.from_generator(gen, args=(features, labels),
output_types=(tf.float32, tf.int32),
output_shapes=(tf.TensorShape([32,4]), tf.TensorShape([32,4 ])),
)
# OR
#output_signature=(
# tf.TensorSpec(shape=(4,), dtype=tf.float32),
# tf.TensorSpec(shape=(), dtype=tf.int32)),
#)
# datasetGen = iter(train_ds)
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(512, activation='relu', input_shape=(32,4,))) # 4 fields
model.add(tf.keras.layers.Dense(4, activation='softmax'))
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
train_ds= train_ds.batch(32).prefetch(32)
# callbacks=[LambdaCallback(on_epoch_end=generator.on_epoch_end)],
history= model.fit(train_ds, epochs = 7, verbose = 1)
print(history.history['accuracy'])
&我得到了:
In tf.transpose([x_train:32,tf.one_hot(y_train:32)] TypeError: unhashable type:‘片’)
问题似乎在这里- x_train:32?
Q?如何修改代码(或者是生成器-func)?还是去output_signature?还是去input_shape=?或者在其他地方)能够在model.fit()方法中使用Dataset?
(很抱歉给出了一个示例,但我想测试model.fit()中的生成器-func用法)
发布于 2022-05-01 18:32:51
嗯,这确实是发电机使用的一个虚拟例子;而且,与生成器的使用相比,tf.data总是在速度上获胜。然而,这些工作(代码还需要重构- 例如:或为BigData -例如:组织输油管道 )。
import tensorflow as tf
import numpy as np
import pandas as pd
# LOAD DATA
df= pd.read_csv('https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv', dtype = 'float32', converters = {'variety' : str},
nrows=64, decimal='.')
# df.head()
_features=df.iloc[:,:4].copy()
_labels=df.iloc[:,-1:].copy()
_labels['variety1'] = pd.factorize(_labels['variety'])[0]
_target= _labels['variety1'].astype(np.int64).copy()
_targets= _target[:,np.newaxis]
#print(_features)
print(type(_targets))
# SPLIT for Train & Test
# https://www.kdnuggets.com/2020/07/getting-started-tensorflow2.html
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(_features,_targets, test_size=0.3)
# Typically, we normalize the data when we have a high amount of variance in it.
print(X_train.var())
print(X_test.var())
# Here we can see that both X_train and X_test have very low variance, so no need to normalize the data.
# PREPROCESSING
#
# to_categorical
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)
print(y_train[:5,:])
# convert our data to numpy arrays
X_train = X_train.values
X_test = X_test.values
#################################################
#################################################
def gen(_features, _labels):
x_train= _features
y_train= _labels
#print('gen:\n', list(x_train))
#print('gen:\n', list(y_train))
idx = 0
while idx<64:
yield x_train[:32], y_train[:32]
print('generator yielded a batch %d' % idx)
idx += 1
#################################################
# train_ds <<<<<<<<<<<<<<<<<<<<<<<
train_ds = tf.data.Dataset.from_generator(gen, args=(X_train, y_train),
output_types=(tf.float32, tf.int64),
output_shapes=(tf.TensorShape([32,4]), tf.TensorShape([32, 2 ])),
)
# OR
#output_signature=(
# tf.TensorSpec(shape=(4,), dtype=tf.float32),
# tf.TensorSpec(shape=(), dtype=tf.int32)),
#)
# datasetGen = iter(train_ds)
# print('train_ds:\n',list(train_ds.as_numpy_iterator()))
#################################################
# Model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense((512), activation='relu', input_shape=(32,4 ))) # 4 fields
model.add(tf.keras.layers.Dense((2), activation='softmax'))
# INSTEAD OF ONE-HOT CAN USE sparse_categorical_crossentropy HERE
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
train_ds= train_ds.batch(32).prefetch(32)
# callbacks=[LambdaCallback(on_epoch_end=generator.on_epoch_end)],
history= model.fit(train_ds, epochs = 7, verbose = 1)
validation_ ds来自于源X_test,由tf.data.Dataset.from_tensor_slices()组成的y_test存在形状(4,而不是模型的输入形状(32,4 ))的问题--但是它从一开始就是不合适的生成器任务,我认为.尽管有了train_ds,评估()和预测()方法可以工作(尽管这不是ML的任务)
##############################################
score = model.evaluate(train_ds, batch_size=32, verbose=1) # test_ds needed
print("Test Accuracy:", score[1])
y_pred = model.predict(train_ds)
print('PREDICTIONS:\n', y_pred)
##############################################
#https://medium.com/@nutanbhogendrasharma/tensorflow-deep-learning-model-with-iris-dataset-8ec344c49f91
#Print actual and predicted value
features, labels = tuple(zip(*train_ds)) # If you need the numpy array version, convert them using np.array(): # https://stackoverflow.com/a/65499385/15893581
actual = np.argmax(labels,axis=-1)
predicted = np.argmax(y_pred,axis=-1)
print(f"Actual: {actual}")
print(f"Predicted: {predicted}")
因此,传入的test_ds 例如:仍然需要采用(虽然我认为在这里采用gen_func更好),但是在TF-2.x中使用生成器的总体想法现在已经很清楚了(只有当将用于巨大的数据时).
改进这里模型的建议
我为这个愚蠢的问题道歉,因为我仍然是ML的新手,但是我需要用某种方式连接生成器&为体验进行培训。
发布于 2022-05-02 14:25:38
好的,我找到了初始数据集的工作案例:
import numpy as np
import tensorflow as tf
import keras
from tensorflow.keras.callbacks import LambdaCallback
import tensorflow_datasets as tfds
data, info = tfds.load('iris', split='train[:100%]', batch_size=10, as_supervised=True, with_info=True)
print(info)
NUM_CLASSES= info.features["label"].num_classes
data = data.map(lambda x, y: (x, tf.one_hot(y, depth=NUM_CLASSES)))
features, labels = tuple(zip(*data))
print(features)
print(labels)
# NB: the generator should yield a dictionary for the inputs, and the output as is.
def gen(x_train, y_train):
print('generator initiated')
print(x_train.shape)
print(y_train.shape)
idx = 0
while True:
yield x_train, y_train
print('generator yielded a batch %d' % idx)
idx += 1
train_ds = tf.data.Dataset.from_generator(gen, args=(features, labels),
output_types=(tf.float32, tf.int32),
output_shapes=(tf.TensorShape([None,10,4]), tf.TensorShape([ None, 10, 3 ])),
)
# OR (better! because prev. is Deprecated)
#output_signature=(
# tf.TensorSpec(shape=(4,), dtype=tf.float32),
# tf.TensorSpec(shape=(), dtype=tf.int32)),
#)
#it = iter(train_ds)
#print(it.get_next())
for feature, label in train_ds:
print("shape of ds_generated: ", feature.shape,label.shape)
break
#num_val = len(train_ds) # TypeError: The dataset length is unknown. BECAUSE it is FLOW
#print(num_val)
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(512, activation='relu', input_shape=(None,10,4))) # 4 fields
model.add(tf.keras.layers.Dense(3, activation='softmax'))
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
train_ds= train_ds.batch(32).prefetch(32)
# callbacks=[LambdaCallback(on_epoch_end=generator.on_epoch_end)],
history= model.fit(train_ds, epochs = 2, steps_per_epoch= 120 // 10, verbose = 1)
print(history.history['accuracy'])
定义带None的可变批处理大小并设置steps_per_epoch
--如果使用split='train:50%‘和steps_per_epoch= 60 / 10 --至于最后一批没有完全填满--我的代码中的问题来源在gen_func output_shapes中--这一点很明显,因为gen_func确实是为了测试目的而设计的.
对于实际情况,使用逻辑输出!和适当的形状
P.S.不过,在五个时代里,我得到的是:
图形执行错误:>> ZMQError:太多打开的文件AttributeError:'_thread._local‘对象没有属性'event_pipe’
--!可能,没有足够的内存来完成训练!.-在密集(512,.)中减少输出帮助(以及减少的次数)
发布于 2022-05-11 10:45:22
最后我从函数(真的,不是快速操作)生成了iris_dataset .需要注意的是重复-fn,但是一般工作中的代码设计(表示真正的随机数据)。
# Importing the tensorflow library
import tensorflow as tf
import numpy as np
import keras
#FeaturesDict({
# 'features': Tensor(shape=(4,), dtype=tf.float32),
# 'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=3),
#})
BATCH_SIZE= 12
EPOCHS = 7
QTY_BATCHES= 10 # to be generated
# The Dataset.from_generator constructor converts the python generator to a fully functional tf.data.Dataset.
def gen():
for i in range(BATCH_SIZE):
# should yield a pair Features - Label
data= np.expand_dims(np.random.sample(4) , axis=0)
label= [np.random.randint(3)]
yield data, label
train_ds = tf.data.Dataset.from_generator(gen,
(tf.float32, tf.int32),
(tf.TensorShape([None,4]),
tf.TensorShape([ 1])))
# Applying the Dataset.repeat() transformation with no arguments will repeat the input indefinitely.
# The Dataset.repeat transformation concatenates its arguments without signaling the end of one epoch and the beginning of the next epoch. Because of this a Dataset.batch applied after Dataset.repeat will yield batches that straddle epoch boundaries:
train_ds= train_ds.repeat(count= EPOCHS*BATCH_SIZE*QTY_BATCHES).batch(BATCH_SIZE, drop_remainder=True).prefetch(BATCH_SIZE)
NUM_CLASSES= 3
train_ds = train_ds.map(lambda x, y: (x, tf.one_hot(y, depth=NUM_CLASSES)))
for x, y in train_ds:
print(x)
print(y)
# Build a simple linear model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(None,4))) # unknown(variable) batch_size, 4 fields
model.add(tf.keras.layers.Dense(3, activation='softmax'))
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
# steps_per_epoch = int( np.ceil(x_train.shape[0] / batch_size) )
# The Steps per epoch denote the number of batches to be selected for one epoch. If 500 steps are selected then the network will train for 500 batches to complete one epoch.
history= model.fit(train_ds, batch_size=BATCH_SIZE, epochs= EPOCHS, \
steps_per_epoch= (QTY_BATCHES*BATCH_SIZE)//BATCH_SIZE, \
verbose = 1)
print(history.history['accuracy'])
print(history.history['loss'])
# Keras - Plot training, validation and test set accuracy
# https://stackoverflow.com/questions/41908379/keras-plot-training-validation-and-test-set-accuracy
import keras
from matplotlib import pyplot as plt
plt.plot(history.history['accuracy'])
#plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
#plt.legend(['train', 'val'], loc='upper left')
plt.legend(['train'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
# plt.legend(['train', 'val'], loc='upper left')
plt.legend(['train'], loc='upper left')
plt.show()
https://stackoverflow.com/questions/72069335
复制相似问题