首先,你要去百度申请一个百度的AI开放平台的账号:https://ai.baidu.com/?track=cp:aipinzhuan|pf:pc|pp:AIpingtai|pu:title|ci:|kw:10005792 然后我们再申请一个android版的app.我们用到的只是这个的appid和appscred.
在创建的时候一定要选择安卓(敲黑板)
然后打开unity,新建脚本文件夹Scripts.然后再里面新建一个base脚本:
using System; using System.Collections; using UnityEngine; using UnityEngine.Networking; namespace Wit.BaiduAip.Speech { /// <summary> /// 用户解析token的json数据 /// </summary> [Serializable] class TokenResponse { public string access_token = null; } public class Base { protected enum TokenFetchStatus { NotFetched, Fetching, Success, Failed } public string SecretKey { get; private set; } public string APIKey { get; private set; } public string Token { get; private set; } protected TokenFetchStatus tokenFetchStatus = TokenFetchStatus.NotFetched; public Base(string apiKey, string secretKey) { APIKey = apiKey; SecretKey = secretKey; } public IEnumerator GetAccessToken() { Debug.Log("[WitBaiduAip]Start fetching token..."); tokenFetchStatus = TokenFetchStatus.Fetching; var uri = string.Format( "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id={0}&client_secret={1}", APIKey, SecretKey); var www = UnityWebRequest.Get(uri); yield return www.SendWebRequest(); if (string.IsNullOrEmpty(www.error)) { Debug.Log("[WitBaiduAip]" + www.downloadHandler.text); var result = JsonUtility.FromJson<TokenResponse>(www.downloadHandler.text); Token = result.access_token; Debug.Log("[WitBaiduAip]Token has been fetched successfully"); tokenFetchStatus = TokenFetchStatus.Success; } else { Debug.LogError("[WitBaiduAip]"+www.error); Debug.LogError("[WitBaiduAip]Token was fetched failed. Please check your APIKey and SecretKey"); tokenFetchStatus = TokenFetchStatus.Failed; } } protected IEnumerator PreAction() { if (tokenFetchStatus == TokenFetchStatus.NotFetched) { Debug.Log("[WitBaiduAip]Token has not been fetched, now fetching..."); yield return GetAccessToken(); } if (tokenFetchStatus == TokenFetchStatus.Fetching) { Debug.Log("[WitBaiduAip]Token is still being fetched, waiting..."); } while (tokenFetchStatus == TokenFetchStatus.Fetching) { yield return null; } } } }
这个是语音识别和语音听写转文字的父类.百度中语音听写和语音合成是免费的 新建一个Asr脚本
using System; using System.Collections; using UnityEngine; using UnityEngine.Networking; namespace Wit.BaiduAip.Speech { [Serializable] public class AsrResponse { public int err_no; public string err_msg; public string sn; public string[] result; } public class Asr : Base { private const string UrlAsr = @"https://vop.baidu.com/server_api"; public Asr(string apiKey, string secretKey) : base(apiKey, secretKey) { } public IEnumerator Recognize(byte[] data, Action<AsrResponse> callback) { yield return PreAction (); if (tokenFetchStatus == Base.TokenFetchStatus.Failed) { Debug.LogError("Token fetched failed, please check your APIKey and SecretKey"); yield break; } var uri = string.Format("{0}?lan=zh&cuid={1}&token={2}", UrlAsr, SystemInfo.deviceUniqueIdentifier, Token); var form = new WWWForm(); form.AddBinaryData("audio", data); var www = UnityWebRequest.Post(uri, form); www.SetRequestHeader("Content-Type", "audio/pcm;rate=16000"); yield return www.SendWebRequest(); if (string.IsNullOrEmpty(www.error)) { Debug.Log("[WitBaiduAip]"+www.downloadHandler.text); callback(JsonUtility.FromJson<AsrResponse>(www.downloadHandler.text)); } else Debug.LogError(www.error); } /// <summary> /// 将Unity的AudioClip数据转化为PCM格式16bit数据 /// </summary> /// <param name="clip"></param> /// <returns></returns> public static byte[] ConvertAudioClipToPCM16(AudioClip clip) { var samples = new float[clip.samples * clip.channels]; clip.GetData(samples, 0); var samples_int16 = new short[samples.Length]; for (var index = 0; index < samples.Length; index++) { var f = samples[index]; samples_int16[index] = (short) (f * short.MaxValue); } var byteArray = new byte[samples_int16.Length * 2]; Buffer.BlockCopy(samples_int16, 0, byteArray, 0, byteArray.Length); return byteArray; } } }
再新建一个Tts
using System; using System.Collections; using System.Collections.Generic; using System.Text; using UnityEngine; using UnityEngine.Networking; namespace Wit.BaiduAip.Speech { /// <summary> /// 语音合成结果 /// </summary> [Serializable] public class TtsResponse { public int err_no; public string err_msg; public string sn; public int idx; public bool Success { get { return err_no == 0; } } public AudioClip clip; } public class Tts : Base { public enum Pronouncer { Female, // 0为普通女声 Male, // 1为普通男生 Duxiaoyao, // 3为情感合成-度逍遥 Duyaya // 4为情感合成-度丫丫 } private const string UrlTts = "http://tsn.baidu.com/text2audio"; public Tts(string apiKey, string secretKey) : base(apiKey, secretKey) { } public IEnumerator Synthesis(string text, Action<TtsResponse> callback, int speed = 5, int pit = 5, int vol = 5, Pronouncer per = Pronouncer.Female) { yield return PreAction(); if (tokenFetchStatus == Base.TokenFetchStatus.Failed) { Debug.LogError("Token was fetched failed. Please check your APIKey and SecretKey"); callback(new TtsResponse() { err_no = -1, err_msg = "Token was fetched failed. Please check your APIKey and SecretKey" }); yield break; } var param = new Dictionary<string, string>(); param.Add("tex", text); param.Add("tok", Token); param.Add("cuid", SystemInfo.deviceUniqueIdentifier); param.Add("ctp", "1"); param.Add("lan", "zh"); param.Add("spd", Mathf.Clamp(speed, 0, 9).ToString()); param.Add("pit", Mathf.Clamp(pit, 0, 9).ToString()); param.Add("vol", Mathf.Clamp(vol, 0, 15).ToString()); param.Add("per", ((int)per).ToString()); #if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP param.Add("aue", "6"); // set to wav, default is mp3 #endif string url = UrlTts; int i = 0; foreach (var p in param) { url += i != 0 ? "&" : "?"; url += p.Key + "=" + p.Value; i++; } #if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP var www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.WAV); #else var www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG); #endif Debug.Log("[WitBaiduAip]" + www.url); yield return www.SendWebRequest(); if (string.IsNullOrEmpty(www.error)) { var type = www.GetResponseHeader("Content-Type"); Debug.Log("[WitBaiduAip]response type: " + type); if (type.Contains("audio")) { #if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP var clip = DownloadHandlerAudioClip.GetContent(www); var response = new TtsResponse { clip = clip }; #else var response = new TtsResponse {clip = DownloadHandlerAudioClip.GetContent(www) }; #endif callback(response); } else { var textBytes = www.downloadHandler.data; var errorText = Encoding.UTF8.GetString(textBytes); Debug.LogError("[WitBaiduAip]" + errorText); callback(JsonUtility.FromJson<TtsResponse>(errorText)); } } else { Debug.LogError(www.error); } } } }
到这里我们的前期工作就做完了,接下来就是再场景中使用了 首先新建一个场景语音识别,场景中2个按钮一个text文本
using UnityEngine; using UnityEngine.UI; using Wit.BaiduAip.Speech; public class AsrDemo : MonoBehaviour { public string APIKey = ""; public string SecretKey = ""; public Button StartButton; public Button StopButton; public Text DescriptionText; private AudioClip _clipRecord; private Asr _asr; // Microphone is not supported in Webgl #if !UNITY_WEBGL void Start() { _asr = new Asr(APIKey, SecretKey); StartCoroutine(_asr.GetAccessToken()); StartButton.gameObject.SetActive(true); StopButton.gameObject.SetActive(false); DescriptionText.text = ""; StartButton.onClick.AddListener(OnClickStartButton); StopButton.onClick.AddListener(OnClickStopButton); } private void OnClickStartButton() { StartButton.gameObject.SetActive(false); StopButton.gameObject.SetActive(true); DescriptionText.text = "Listening..."; _clipRecord = Microphone.Start(null, false, 30, 16000); } private void OnClickStopButton() { StartButton.gameObject.SetActive(false); StopButton.gameObject.SetActive(false); DescriptionText.text = "Recognizing..."; Microphone.End(null); Debug.Log("[WitBaiduAip demo]end record"); var data = Asr.ConvertAudioClipToPCM16(_clipRecord); StartCoroutine(_asr.Recognize(data, s => { DescriptionText.text = s.result != null && s.result.Length > 0 ? s.result[0] : "未识别到声音"; StartButton.gameObject.SetActive(true); })); } #endif }
然后拖拽到摄像机上,配置脚本的引用文件
然后现在你可以测试你的场景了,可以识别到你说的话. 下面是语音合成,新建一个场景然后新键脚本文件
using UnityEngine; using UnityEngine.UI; using Wit.BaiduAip.Speech; public class TtsDemo : MonoBehaviour { public string APIKey = ""; public string SecretKey = ""; public Button SynthesisButton; public InputField Input; public Text DescriptionText; private Tts _asr; private AudioSource _audioSource; private bool _startPlaying; void Start() { _asr = new Tts(APIKey, SecretKey); StartCoroutine(_asr.GetAccessToken()); _audioSource = gameObject.AddComponent<AudioSource>(); DescriptionText.text = ""; SynthesisButton.onClick.AddListener(OnClickSynthesisButton); } private void OnClickSynthesisButton() { SynthesisButton.gameObject.SetActive(false); DescriptionText.text = "合成中..."; StartCoroutine(_asr.Synthesis(Input.text, s => { if (s.Success) { DescriptionText.text = "合成成功,正在播放"; _audioSource.clip = s.clip; _audioSource.Play(); _startPlaying = true; } else { DescriptionText.text = s.err_msg; SynthesisButton.gameObject.SetActive(true); } })); } void Update() { if (_startPlaying) { if (!_audioSource.isPlaying) { _startPlaying = false; DescriptionText.text = "播放完毕,可以修改文本继续测试"; SynthesisButton.gameObject.SetActive(true); } } } }
场景中加入:输入框和一个按钮 把脚本拖拽到摄像机上 脚本配置如下:
然后运行的时候在输入框中输入你要合成的文字,点击按钮之后可以听到合成的声音.
本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。
我来说两句