跟着本教程,我卡在.xml到.record转换上了。
实际上,当我使用以下查询时:
C:\XXXX\scripts\processing>python generate_tfrecord.py -x C:/XXXX/workspace/training_demo/images/train -l C:/XXXX/training_demo/annotations/label_map.pbtxt -o C:/XXXX/workspace/training_demo/annotations/train.record
这确实还我:
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 107: invalid start byte
.xml是这样的:
<annotation>
<folder>train</folder>
<filename>XXXX.PNG</filename>
<path>C:\XXXX\workspace\training_demo\images\train\XXXX.PNG</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>93</width>
<height>66</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>XXXX</name>
<pose>Unspecified</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>1</xmin>
<ymin>1</ymin>
<xmax>93</xmax>
<ymax>66</ymax>
</bndbox>
</object>
</annotation>
代码与本教程中的代码完全相同:
""" Sample TensorFlow XML-to-TFRecord converter
usage: generate_tfrecord.py [-h] [-x XML_DIR] [-l LABELS_PATH] [-o OUTPUT_PATH] [-i IMAGE_DIR] [-c CSV_PATH]
optional arguments:
-h, --help show this help message and exit
-x XML_DIR, --xml_dir XML_DIR
Path to the folder where the input .xml files are stored.
-l LABELS_PATH, --labels_path LABELS_PATH
Path to the labels (.pbtxt) file.
-o OUTPUT_PATH, --output_path OUTPUT_PATH
Path of output TFRecord (.record) file.
-i IMAGE_DIR, --image_dir IMAGE_DIR
Path to the folder where the input image files are stored. Defaults to the same directory as XML_DIR.
-c CSV_PATH, --csv_path CSV_PATH
Path of output .csv file. If none provided, then no file will be written.
"""
import os
import glob
import pandas as pd
import io
import xml.etree.ElementTree as ET
import argparse
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow logging (1)
import tensorflow.compat.v1 as tf
from PIL import Image
from object_detection.utils import dataset_util, label_map_util
from collections import namedtuple
# Initiate argument parser
parser = argparse.ArgumentParser(
description="Sample TensorFlow XML-to-TFRecord converter")
parser.add_argument("-x",
"--xml_dir",
help="Path to the folder where the input .xml files are stored.",
type=str)
parser.add_argument("-l",
"--labels_path",
help="Path to the labels (.pbtxt) file.", type=str)
parser.add_argument("-o",
"--output_path",
help="Path of output TFRecord (.record) file.", type=str)
parser.add_argument("-i",
"--image_dir",
help="Path to the folder where the input image files are stored. "
"Defaults to the same directory as XML_DIR.",
type=str, default=None)
parser.add_argument("-c",
"--csv_path",
help="Path of output .csv file. If none provided, then no file will be "
"written.",
type=str, default=None)
args = parser.parse_args()
if args.image_dir is None:
args.image_dir = args.xml_dir
label_map = label_map_util.load_labelmap(args.labels_path)
label_map_dict = label_map_util.get_label_map_dict(label_map)
def xml_to_csv(path):
"""Iterates through all .xml files (generated by labelImg) in a given directory and combines
them in a single Pandas dataframe.
Parameters:
----------
path : str
The path containing the .xml files
Returns
-------
Pandas DataFrame
The produced dataframe
"""
xml_list = []
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height',
'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
def class_text_to_int(row_label):
return label_map_dict[row_label]
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer = tf.python_io.TFRecordWriter(args.output_path)
path = os.path.join(args.image_dir)
examples = xml_to_csv(args.xml_dir)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
print('Successfully created the TFRecord file: {}'.format(args.output_path))
if args.csv_path is not None:
examples.to_csv(args.csv_path, index=None)
print('Successfully created the CSV file: {}'.format(args.csv_path))
if __name__ == '__main__':
tf.app.run()
这是label_map.pbtxt文件
item {
id: 21
name: 'XXXX'
}
item {
id: 31
name: 'XXXX'
}
item {
id: 41
name: 'XXXX'
}
完全consol返回:
C:\Users\Dorian\anaconda3\envs\XXXX\lib\site-packages\numpy\_distributor_init.py:30: UserWarning: loaded more than 1 DLL from .libs:
C:\Users\Dorian\anaconda3\envs\XXXX\lib\site-packages\numpy\.libs\libopenblas.JPIJNSWNNAN3CE6LLI5FWSPHUT2VXMTH.gfortran-win_amd64.dll
C:\Users\Dorian\anaconda3\envs\XXXX\lib\site-packages\numpy\.libs\libopenblas.QVLO2T66WEPI7JZ63PS3HMOHFEY472BC.gfortran-win_amd64.dll
warnings.warn("loaded more than 1 DLL from .libs:"
Traceback (most recent call last):
File "generate_tfrecord.py", line 61, in <module>
label_map = label_map_util.load_labelmap(args.labels_path)
File "C:\Users\Dorian\anaconda3\envs\XXXX\lib\site-packages\object_detection-0.1-py3.8.egg\object_detection\utils\label_map_util.py", line 168, in load_labelmap
label_map_string = fid.read()
File "C:\Users\Dorian\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\lib\io\file_io.py", line 117, in read
self._preread_check()
File "C:\Users\Dorian\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\lib\io\file_io.py", line 79, in _preread_check
self._read_buf = _pywrap_file_io.BufferedInputStream(
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 107: invalid start byte
编辑这里是label_map
item {
id: 21
name: '2Carreau'
}
item {
id: 31
name: '3Carreau'
}
item {
id: 41
name: '4Carreau'
}
item {
id: 51
name: '5Carreau'
}
item {
id: 61
name: '6Carreau'
}
item {
id: 71
name: '7Carreau'
}
item {
id: 81
name: '8Carreau'
}
item {
id: 91
name: '9Carreau'
}
item {
id: 101
name: '10Carreau'
}
item {
id: 111
name: '11Carreau'
}
item {
id: 121
name: '12Carreau'
}
item {
id: 131
name: '13Carreau'
}
item {
id: 141
name: '14Carreau'
}
item {
id: 22
name: '2Coeur'
}
item {
id: 32
name: '3Coeur'
}
item {
id: 42
name: '4Coeur'
}
item {
id: 52
name: '5Coeur'
}
item {
id: 62
name: '6Coeur'
}
item {
id: 72
name: '7Coeur'
}
item {
id: 82
name: '8Coeur'
}
item {
id: 92
name: '9Coeur'
}
item {
id: 102
name: '10Coeur'
}
item {
id: 112
name: '11Coeur'
}
item {
id: 122
name: '12Coeur'
}
item {
id: 132
name: '13Coeur'
}
item {
id: 142
name: '14Coeur'
}
item {
id: 23
name: '2Trefle'
}
item {
id: 33
name: '3Trefle'
}
item {
id: 43
name: '4Trefle'
}
item {
id: 53
name: '5Trefle'
}
item {
id: 63
name: '6Trefle'
}
item {
id: 73
name: '7Trefle'
}
item {
id: 83
name: '8Trefle'
}
item {
id: 93
name: '9Trefle'
}
item {
id: 103
name: '10Trefle'
}
item {
id: 113
name: '11Trefle'
}
item {
id: 123
name: '12Trefle'
}
item {
id: 133
name: '13Trefle'
}
item {
id: 143
name: '14Trefle'
}
item {
id: 24
name: '2Pic'
}
item {
id: 34
name: '3Pic'
}
item {
id: 44
name: '4Pic'
}
item {
id: 54
name: '5Pic'
}
item {
id: 64
name: '6Pic'
}
item {
id: 74
name: '7Pic'
}
item {
id: 84
name: '8Pic'
}
item {
id: 94
name: '9Pic'
}
item {
id: 104
name: '10Pic'
}
item {
id: 114
name: '11Pic'
}
item {
id: 124
name: '12Pic'
}
item {
id: 134
name: '13Pic'
}
item {
id: 144
name: '14Pic'
}
现在我使用以下查询:
C:\####\workspace\training_demo>python model_main_tf2.py --model_dir=models/my_ssd_resnet50_v1_fpn --pipeline_config_path=models/my_ssd_resnet50_v1_fpn/pipeline.config
开始的很好,但是,在最后抛出同样的问题,我检查了pipeline.config
和model_main_tf2
,但是你的回答没有纠正这个问题。你知不知道?
2021-03-03 09:53:43.878440: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudart64_110.dll
2021-03-03 09:53:48.745301: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-03-03 09:53:48.749824: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library nvcuda.dll
2021-03-03 09:53:48.779768: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties:
pciBusID: 0000:1c:00.0 name: GeForce GTX 1070 Ti computeCapability: 6.1
coreClock: 1.683GHz coreCount: 19 deviceMemorySize: 8.00GiB deviceMemoryBandwidth: 238.66GiB/s
2021-03-03 09:53:48.786205: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudart64_110.dll
2021-03-03 09:53:48.800110: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2021-03-03 09:53:48.803731: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublasLt64_11.dll
2021-03-03 09:53:48.812755: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cufft64_10.dll
2021-03-03 09:53:48.822516: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library curand64_10.dll
2021-03-03 09:53:48.837930: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusolver64_10.dll
2021-03-03 09:53:48.851302: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusparse64_11.dll
2021-03-03 09:53:48.856177: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudnn64_8.dll
2021-03-03 09:53:48.860712: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0
2021-03-03 09:53:48.863378: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-03-03 09:53:48.873474: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties:
pciBusID: 0000:1c:00.0 name: GeForce GTX 1070 Ti computeCapability: 6.1
coreClock: 1.683GHz coreCount: 19 deviceMemorySize: 8.00GiB deviceMemoryBandwidth: 238.66GiB/s
2021-03-03 09:53:48.881298: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudart64_110.dll
2021-03-03 09:53:48.884006: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2021-03-03 09:53:48.887551: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublasLt64_11.dll
2021-03-03 09:53:48.891894: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cufft64_10.dll
2021-03-03 09:53:48.895372: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library curand64_10.dll
2021-03-03 09:53:48.898176: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusolver64_10.dll
2021-03-03 09:53:48.903001: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusparse64_11.dll
2021-03-03 09:53:48.906421: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudnn64_8.dll
2021-03-03 09:53:48.910388: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0
2021-03-03 09:53:49.506138: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1261] Device interconnect StreamExecutor with strength 1 edge matrix:
2021-03-03 09:53:49.509246: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1267] 0
2021-03-03 09:53:49.511875: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1280] 0: N
2021-03-03 09:53:49.513745: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1406] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6278 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1070 Ti, pci bus id: 0000:1c:00.0, compute capability: 6.1)
2021-03-03 09:53:49.521636: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
I0303 09:53:49.527721 12968 mirrored_strategy.py:350] Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Traceback (most recent call last):
File "model_main_tf2.py", line 113, in <module>
tf.compat.v1.app.run()
File "C:\Users\Dorian\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\platform\app.py", line 40, in run
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
File "C:\Users\Dorian\anaconda3\envs\####\lib\site-packages\absl\app.py", line 303, in run
_run_main(main, args)
File "C:\Users\Dorian\anaconda3\envs\####\lib\site-packages\absl\app.py", line 251, in _run_main
sys.exit(main(argv))
File "model_main_tf2.py", line 104, in main
model_lib_v2.train_loop(
File "C:\Users\Dorian\anaconda3\envs\####\lib\site-packages\object_detection-0.1-py3.8.egg\object_detection\model_lib_v2.py", line 474, in train_loop
configs = get_configs_from_pipeline_file(
File "C:\Users\Dorian\anaconda3\envs\####\lib\site-packages\object_detection-0.1-py3.8.egg\object_detection\utils\config_util.py", line 138, in get_configs_from_pipeline_file
proto_str = f.read()
File "C:\Users\Dorian\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\lib\io\file_io.py", line 117, in read
self._preread_check()
File "C:\Users\Dorian\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\lib\io\file_io.py", line 79, in _preread_check
self._read_buf = _pywrap_file_io.BufferedInputStream(
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 102: invalid start byte
发布于 2021-03-03 08:09:39
文件的内容似乎是正确的,但它包含撇号字符('
U+27)。在cp1252编码中,0x92是右单引号(’
U+2019)的编码。一些文字处理器很聪明,知道在普通文本中,右引号和左引号看起来比英语撇号或引号("
)更好看,而且它们倾向于自动替换它们。
不幸的是,在读取文本文件时很难识别它们。
因此,您应该两次控制文本文件是否包含这些字符。
https://stackoverflow.com/questions/66440549
复制相似问题