import random
import os
import tensorflow as tf
# 使用前一节 kaggle 上的 猫狗数据集
train_data_dir = "./dogs-vs-cats/train/"
test_data_dir = "./dogs-vs-cats/test/"
# 训练文件路径
file_dir = [train_data_dir + filename for filename in os.listdir(train_data_dir)]
labels = [0 if filename[0] == 'c' else 1
for filename in os.listdir(train_data_dir)]
# 打包并打乱
f_l = list(zip(file_dir, labels))
random.shuffle(f_l)
file_dir, labels = zip(*f_l)
# 切分训练集,验证集
valid_ratio = 0.1
idx = int((1 - valid_ratio) * len(file_dir))
train_files, valid_files = file_dir[:idx], file_dir[idx:]
train_labels, valid_labels = labels[:idx], labels[idx:]
# tfrecord 格式数据存储路径
train_tfrecord_file = "./dogs-vs-cats/train.tfrecords"
valid_tfrecord_file = "./dogs-vs-cats/valid.tfrecords"
# -------------------看下面代码-----------------------------
# 存储过程
# 预先定义一个写入器
with tf.io.TFRecordWriter(path=train_tfrecord_file) as writer:
# 遍历原始数据
for filename, label in zip(train_files, train_labels):
img = open(filename, 'rb').read() # 读取图片,img 是 Byte 类型的字符串
# 建立 feature 的 字典 k : v
feature = {
'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img])),
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
}
# feature 包裹成 example
example = tf.train.Example(features=tf.train.Features(feature=feature))
# example 序列化为字符串,写入
writer.write(example.SerializeToString())
# -------------------看下面代码-----------------------------
# 读取过程
# 读取 tfrecord 数据,得到 tf.data.Dataset 对象
raw_train_dataset = tf.data.TFRecordDataset(train_tfrecord_file)
# 特征的格式、数据类型
feature_description = {
'image': tf.io.FixedLenFeature(shape=[], dtype=tf.string),
'label': tf.io.FixedLenFeature([], tf.int64),
}
def _parse_example(example_string): # 解码每个example
# tf.io.parse_single_example 反序列化
feature_dict = tf.io.parse_single_example(example_string, feature_description)
# 图像解码
feature_dict['image'] = tf.io.decode_jpeg(feature_dict['image'])
# 返回数据 X, y
return feature_dict['image'], feature_dict['label']
# 处理数据集
train_dataset = raw_train_dataset.map(_parse_example)
import matplotlib.pyplot as plt
for img, label in train_dataset:
plt.title('cat' if label==0 else 'dog')
plt.imshow(img.numpy())
plt.show()
tf.function
模块 + AutoGraph
机制,使用 @tf.function
修饰符,就可以将模型以图执行模式运行注意:@tf.function
修饰的函数内,尽量只用 tf 的内置函数,变量只用 tensor、numpy 数组
get_concrete_function
方法,获得计算图graph = F.get_concrete_function(X, y)
tf.TensorArray
支持计算图模式的 动态数组arr = tf.TensorArray(dtype=tf.int64, size=1, dynamic_size=True)
arr = arr.write(index=1, value=512)
# arr.write(index=0, value=512) # 没有左值接受,会丢失
for i in range(arr.size()):
print(arr.read(i))
list_physical_devices
print('---device----')
gpus = tf.config.list_physical_devices(device_type='GPU')
cpus = tf.config.list_physical_devices(device_type='CPU')
print(gpus, "\n", cpus)
# 单个的 GPU, CPU
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
set_visible_devices
tf.config.set_visible_devices(devices=gpus[0:2], device_type='GPU')
或者
export CUDA_VISIBLE_DEVICES=2,3
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "2,3"
指定程序 只在 显卡 2, 3 上运行
gpus = tf.config.list_physical_devices(device_type='GPU')
for gpu in gpus:
# 仅在需要时申请显存
tf.config.experimental.set_memory_growth(device=gpu, enable=True)
gpus = tf.config.list_physical_devices(device_type='GPU')
# 固定显存使用上限,超出报错
tf.config.set_logical_device_configuration(
gpus[0],
[tf.config.LogicalDeviceConfiguration(memory_limit=1024)])
在单GPU电脑上,写 多GPU 代码,可以模拟实现
gpus = tf.config.list_physical_devices('GPU')
tf.config.set_logical_device_configuration(
gpus[0],
[tf.config.LogicalDeviceConfiguration(memory_limit=2048),
tf.config.LogicalDeviceConfiguration(memory_limit=2048)])
gpus = tf.config.list_logical_devices(device_type='GPU')
print(gpus)
输出:2个虚拟的GPU
[LogicalDevice(name='/device:GPU:0', device_type='GPU'),
LogicalDevice(name='/device:GPU:1', device_type='GPU')]