TypeError Traceback (most recent call last)
Input In [3], in <cell line: 1>()
----> 1 (train_data,test_data,info)=tfds.load("imdb_reviews/subwords8k",
2 split=(tfds.Split.TRAIN,tfds.Split.TEST),
3 with_info=True,
4 as_supervised=True)
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\logging\__init__.py:169, in _FunctionDecorator.__call__(self, function, instance, args, kwargs)
167 metadata = self._start_call()
168 try:
--> 169 return function(*args, **kwargs)
170 except Exception:
171 metadata.mark_error()
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\load.py:640, in load(name, split, data_dir, batch_size, shuffle_files, download, as_supervised, decoders, read_config, with_info, builder_kwargs, download_and_prepare_kwargs, as_dataset_kwargs, try_gcs)
521 """Loads the named dataset into a `tf.data.Dataset`.
522
523 `tfds.load` is a convenience method that:
(...)
632 Split-specific information is available in `ds_info.splits`.
633 """
634 dbuilder = _fetch_builder(
635 name,
636 data_dir,
637 builder_kwargs,
638 try_gcs,
639 )
--> 640 _download_and_prepare_builder(dbuilder, download, download_and_prepare_kwargs)
642 if as_dataset_kwargs is None:
643 as_dataset_kwargs = {}
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\load.py:499, in _download_and_prepare_builder(dbuilder, download, download_and_prepare_kwargs)
497 if download:
498 download_and_prepare_kwargs = download_and_prepare_kwargs or {}
--> 499 dbuilder.download_and_prepare(**download_and_prepare_kwargs)
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\logging\__init__.py:169, in _FunctionDecorator.__call__(self, function, instance, args, kwargs)
167 metadata = self._start_call()
168 try:
--> 169 return function(*args, **kwargs)
170 except Exception:
171 metadata.mark_error()
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\dataset_builder.py:646, in DatasetBuilder.download_and_prepare(self, download_dir, download_config, file_format)
644 self.info.read_from_directory(self._data_dir)
645 else:
--> 646 self._download_and_prepare(
647 dl_manager=dl_manager,
648 download_config=download_config,
649 )
651 # NOTE: If modifying the lines below to put additional information in
652 # DatasetInfo, you'll likely also want to update
653 # DatasetInfo.read_from_directory to possibly restore these attributes
654 # when reading from package data.
655 self.info.download_size = dl_manager.downloaded_size
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\dataset_builder.py:1498, in GeneratorBasedBuilder._download_and_prepare(self, dl_manager, download_config)
1496 else:
1497 optional_pipeline_kwargs = {}
-> 1498 split_generators = self._split_generators( # pylint: disable=unexpected-keyword-arg
1499 dl_manager, **optional_pipeline_kwargs
1500 )
1501 # TODO(tfds): Could be removed once all datasets are migrated.
1502 # https://github.com/tensorflow/datasets/issues/2537
1503 # Legacy mode (eventually convert list[SplitGeneratorLegacy] -> dict)
1504 split_generators = split_builder.normalize_legacy_split_generators(
1505 split_generators=split_generators,
1506 generator_fn=self._generate_examples,
1507 is_beam=isinstance(self, BeamBasedBuilder),
1508 )
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\datasets\imdb_reviews\imdb_reviews_dataset_builder.py:115, in Builder._split_generators(self, dl_manager)
112 archive = lambda: dl_manager.iter_archive(arch_path)
114 # Generate vocabulary from training data if SubwordTextEncoder configured
--> 115 self.info.features["text"].maybe_build_from_corpus(
116 self._vocab_text_gen(archive())
117 )
119 return [
120 tfds.core.SplitGenerator(
121 name=tfds.Split.TRAIN,
(...)
141 ),
142 ]
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\features\text_feature.py:169, in Text.maybe_build_from_corpus(self, corpus_generator, **kwargs)
166 return
168 vocab_size = self._encoder_config.vocab_size
--> 169 self.encoder = text_lib.SubwordTextEncoder.build_from_corpus(
170 corpus_generator=corpus_generator,
171 target_vocab_size=vocab_size,
172 **kwargs,
173 )
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\deprecated\text\subword_text_encoder.py:294, in SubwordTextEncoder.build_from_corpus(cls, corpus_generator, target_vocab_size, max_subword_length, max_corpus_chars, reserved_tokens)
288 reserved_tokens = reserved_tokens or []
289 _validate_build_arguments(
290 max_subword_length=max_subword_length,
291 reserved_tokens=reserved_tokens,
292 target_vocab_size=target_vocab_size,
293 )
--> 294 token_counts = _token_counts_from_generator(
295 generator=corpus_generator,
296 max_chars=max_corpus_chars,
297 reserved_tokens=reserved_tokens,
298 )
300 # Binary search on the minimum token count to build a vocabulary with
301 # approximately the right size
302 def _binary_search(min_token_count, max_token_count):
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\deprecated\text\subword_text_encoder.py:413, in _token_counts_from_generator(generator, max_chars, reserved_tokens)
411 num_chars = 0
412 token_counts = collections.defaultdict(int)
--> 413 for s in generator:
414 s = tf.compat.as_text(s)
415 if max_chars and (num_chars + len(s)) >= max_chars:
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\datasets\imdb_reviews\imdb_reviews_dataset_builder.py:105, in Builder._vocab_text_gen(self, archive)
104 def _vocab_text_gen(self, archive):
--> 105 for _, ex in self._generate_examples(
106 archive, os.path.join("aclImdb", "train")
107 ):
108 yield ex["text"]
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\datasets\imdb_reviews\imdb_reviews_dataset_builder.py:151, in Builder._generate_examples(self, archive, directory, labeled)
147 reg_path = "(?P<label>neg|pos)" if labeled else "unsup"
148 reg = re.compile(
149 os.path.join("^%s" % directory, reg_path, "").replace("\\", "\\\\")
150 )
--> 151 for path, imdb_f in archive:
152 res = reg.match(path)
153 if not res:
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\download\extractor.py:179, in iter_tar(arch_f, stream)
176 read_type = 'r' + ('|' if stream else ':') + '*'
178 with _open_or_pass(arch_f) as fobj:
--> 179 tar = tarfile.open(mode=read_type, fileobj=fobj)
180 for member in tar:
181 if stream and (member.islnk() or member.issym()):
182 # Links cannot be dereferenced in stream mode.
File D:\anaconda\envs\tensorflow_gpu\lib\tarfile.py:1599, in TarFile.open(cls, name, mode, fileobj, bufsize, **kwargs)
1597 func = getattr(cls, cls.OPEN_METH[comptype])
1598 if fileobj is not None:
-> 1599 saved_pos = fileobj.tell()
1600 try:
1601 return func(name, "r", fileobj, **kwargs)
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\lib\io\file_io.py:186, in FileIO.tell(self)
184 """Returns the current position in the file."""
185 if self._read_check_passed:
--> 186 self._preread_check()
187 return self._read_buf.tell()
188 else:
File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\lib\io\file_io.py:78, in FileIO._preread_check(self)
75 if not self._read_check_passed:
76 raise errors.PermissionDeniedError(None, None,
77 "File isn't open for reading")
---> 78 self._read_buf = _pywrap_file_io.BufferedInputStream(
79 self.__name, 1024 * 512)
TypeError: __init__(): incompatible constructor arguments. The following argument types are supported:
1. tensorflow.python._pywrap_file_io.BufferedInputStream(arg0: str, arg1: int)
Invoked with: WindowsGPath('C:\\Users\\123456\\tensorflow_datasets\\downloads\\ai.stanfor.edu_amaas_sentime_aclImdb_v1xA90oY07YfkP66HhdzDg046Ll8Bf3nAIlC6Rkj0WWP4.tar.gz'), 524288
相似问题