我一直在使用以下脚本将mp4文件批量转换为wav,使用第二个脚本将语音写入文本。我已经成功地运行了一段时间,并转换了大约2000音频文件。所有文件的长度都小于60秒。然后,speech_recognition突然停止工作,并会为每个文件提供以下错误。
文件读取错误:音频文件不能作为PCM WAV、AIFF/AIFF-C或本机FLAC读取;请检查文件是否损坏或以其他格式读取
任何帮助都将不胜感激。
这是我将mp4转换为wav的代码:
#!/usr/bin/env python3
#convert mp4 to wav
import os
import sys
import glob
from pydub import AudioSegment
folder_path = input("Enter the path for the folder/directory : ")
print("\n Processing...")
#Remove quotes from string
if folder_path[0]=="\"":
folder_path = folder_path[1:]
if folder_path[-1] == "\"":
folder_path = folder_path[:-1]
os.chdir(folder_path)
folder = os.listdir(folder_path)
#Count files
wavList = glob.glob(folder_path + r"\*.wav")
mp4List = glob.glob(folder_path + r"\*.mp4")
if(input(str(len(mp4List)) + r" '.mp4' files & " + str(len(wavList)) + r" '.wav' files found. Continue (y/n) : ") != "y"):
print('canceled by user')
exit()
#loop through files
print(folder_path)
#for srcfile in folder: #loop in folder only
for subdir, dirs, files in os.walk(folder_path):
os.chdir(subdir)
for srcfile in files:
print(srcfile)
if(srcfile[-4:]==".mp4"):
wavfile = srcfile[:-3] + "wav"
print(wavfile)
if (os.path.isfile(wavfile)):
if sys.argv[0] == "a":
os.remove(wavfile)
if not (os.path.isfile(wavfile)):
infile = os.path.join(folder_path, srcfile)
print(infile)
wavpath = os.path.join(folder_path, wavfile)
print(wavpath)
audio = AudioSegment.from_file(infile, format = "mp4")
audio.export(wavpath, format = "WAV")
这是我的功能,音频到文字。我删减了它,因为我有很多语音识别引擎的选项,但它并没有走那么远。
#!/usr/bin/env python3
import speech_recognition as sr
import os
import json
import atexit
text_count = 0
fail_count = 0
skip_count = 0
def get_audio_text(audio_file, TRANSLATE_OPTION):
txt = audio_file
# use the audio file as the audio source
r = sr.Recognizer()
try:
with sr.AudioFile(audio_file) as source:
audio = r.record(source) # ERROR HERE
except Exception as e:
errStr = "File Read Error: " + str(e)
print(errStr)
return errStr
if(TRANSLATE_OPTION == "s" or TRANSLATE_OPTION == "sphinx"):
# recognize speech using Sphinx
try:
txt = r.recognize_sphinx(audio)
except sr.UnknownValueError:
txt ="Sphinx could not understand audio"
except sr.RequestError as e:
txt ="Sphinx error; {0}".format(e)
elif(TRANSLATE_OPTION == "g" or TRANSLATE_OPTION == "google"):
# recognize speech using Google Speech Recognition
try:
# for testing purposes, we're just using the default API key
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
txt = r.recognize_google(audio)
except sr.UnknownValueError:
txt = "Google Speech Recognition could not understand audio"
except sr.RequestError as e:
txt = "Could not request results from Google Speech Recognition service; {0}".format(e)
return txt
在Windows 10上运行。尝试使用python3.10和python3.9
发布于 2022-02-27 22:34:16
我发现了我的问题。它根本不属于这个功能。我开始用MP4来代替..wav的函数。
这是一个典型的例子,我假设调用代码没有问题,然后发现它有问题。(是的,我应该全部分享)。
正确的呼叫代码:
for subdir, dirs, files in os.walk(folder_path):
os.chdir(subdir)
for file in files:
#print(file, file[-4:]) #for debugging
if(file[-4:]==".wav"):
txt = get_audio_text(full_path, trans_optn)
不正确的呼叫代码
for subdir, dirs, files in os.walk(folder_path):
os.chdir(subdir)
for file in files:
#print(file, file[-4:]) #for debugging
if(file[-4:]==".mp4"):
txt = get_audio_text(full_path, trans_optn)
我真的不知道我是怎么搞砸的。
https://stackoverflow.com/questions/71260191
复制相似问题