专栏首页Unity TechnologyJtro的技术分享:unity接入百度语音识别SDK

Jtro的技术分享:unity接入百度语音识别SDK

首先,你要去百度申请一个百度的AI开放平台的账号:https://ai.baidu.com/?track=cp:aipinzhuan|pf:pc|pp:AIpingtai|pu:title|ci:|kw:10005792 然后我们再申请一个android版的app.我们用到的只是这个的appid和appscred.

在创建的时候一定要选择安卓(敲黑板)

然后打开unity,新建脚本文件夹Scripts.然后再里面新建一个base脚本:

using System;
using System.Collections;
using UnityEngine;
using UnityEngine.Networking;

namespace Wit.BaiduAip.Speech
{
    /// <summary>
    /// 用户解析token的json数据
    /// </summary>
    [Serializable]
    class TokenResponse
    {
        public string access_token = null;
    }

    public class Base
    {
        protected enum TokenFetchStatus
        {
            NotFetched,
            Fetching,
            Success,
            Failed
        }

        public string SecretKey { get; private set; }

        public string APIKey { get; private set; }

        public string Token { get; private set; }

        protected TokenFetchStatus tokenFetchStatus = TokenFetchStatus.NotFetched;

        public Base(string apiKey, string secretKey)
        {
            APIKey = apiKey;
            SecretKey = secretKey;
        }

        public IEnumerator GetAccessToken()
        {
            Debug.Log("[WitBaiduAip]Start fetching token...");
            tokenFetchStatus = TokenFetchStatus.Fetching;

            var uri =
                string.Format(
                    "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id={0}&client_secret={1}",
                    APIKey, SecretKey);
            var www = UnityWebRequest.Get(uri);
            yield return www.SendWebRequest();

            if (string.IsNullOrEmpty(www.error))
            {
                Debug.Log("[WitBaiduAip]" + www.downloadHandler.text);
                var result = JsonUtility.FromJson<TokenResponse>(www.downloadHandler.text);
                Token = result.access_token;
                Debug.Log("[WitBaiduAip]Token has been fetched successfully");
                tokenFetchStatus = TokenFetchStatus.Success;
            }
            else
            {
                Debug.LogError("[WitBaiduAip]"+www.error);
                Debug.LogError("[WitBaiduAip]Token was fetched failed. Please check your APIKey and SecretKey");
                tokenFetchStatus = TokenFetchStatus.Failed;
            }
        }

        protected IEnumerator PreAction()
        {
            if (tokenFetchStatus == TokenFetchStatus.NotFetched)
            {
                Debug.Log("[WitBaiduAip]Token has not been fetched, now fetching...");
                yield return GetAccessToken();
            }

            if (tokenFetchStatus == TokenFetchStatus.Fetching)
            {
                Debug.Log("[WitBaiduAip]Token is still being fetched, waiting...");
            }

            while (tokenFetchStatus == TokenFetchStatus.Fetching)
            {
                yield return null;
            }
        }
    }
}

这个是语音识别和语音听写转文字的父类.百度中语音听写和语音合成是免费的 新建一个Asr脚本

using System;
using System.Collections;
using UnityEngine;
using UnityEngine.Networking;

namespace Wit.BaiduAip.Speech
{
    [Serializable]
    public class AsrResponse
    {
        public int err_no;
        public string err_msg;
        public string sn;
        public string[] result;
    }

    public class Asr : Base
    {
        private const string UrlAsr = @"https://vop.baidu.com/server_api";

        public Asr(string apiKey, string secretKey) : base(apiKey, secretKey)
        {
        }

        public IEnumerator Recognize(byte[] data, Action<AsrResponse> callback)
        {
            yield return PreAction ();

            if (tokenFetchStatus == Base.TokenFetchStatus.Failed) {
                Debug.LogError("Token fetched failed, please check your APIKey and SecretKey");
                yield break;
            }

            var uri = string.Format("{0}?lan=zh&cuid={1}&token={2}", UrlAsr, SystemInfo.deviceUniqueIdentifier, Token);

            var form = new WWWForm();
            form.AddBinaryData("audio", data);
            var www = UnityWebRequest.Post(uri, form);
            www.SetRequestHeader("Content-Type", "audio/pcm;rate=16000");
            yield return www.SendWebRequest();

            if (string.IsNullOrEmpty(www.error))
            {
                Debug.Log("[WitBaiduAip]"+www.downloadHandler.text);
                callback(JsonUtility.FromJson<AsrResponse>(www.downloadHandler.text));
            }
            else
                Debug.LogError(www.error);
        }

        /// <summary>
        /// 将Unity的AudioClip数据转化为PCM格式16bit数据
        /// </summary>
        /// <param name="clip"></param>
        /// <returns></returns>
        public static byte[] ConvertAudioClipToPCM16(AudioClip clip)
        {
            var samples = new float[clip.samples * clip.channels];
            clip.GetData(samples, 0);
            var samples_int16 = new short[samples.Length];

            for (var index = 0; index < samples.Length; index++)
            {
                var f = samples[index];
                samples_int16[index] = (short) (f * short.MaxValue);
            }

            var byteArray = new byte[samples_int16.Length * 2];
            Buffer.BlockCopy(samples_int16, 0, byteArray, 0, byteArray.Length);

            return byteArray;
        }
    }
}

再新建一个Tts

using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using UnityEngine;
using UnityEngine.Networking;

namespace Wit.BaiduAip.Speech
{
    /// <summary>
    ///     语音合成结果
    /// </summary>
    [Serializable]
    public class TtsResponse
    {
        public int err_no;
        public string err_msg;
        public string sn;
        public int idx;

        public bool Success
        {
            get { return err_no == 0; }
        }

        public AudioClip clip;
    }

    public class Tts : Base
    {
        public enum Pronouncer
        {
            Female, // 0为普通女声
            Male, // 1为普通男生
            Duxiaoyao, // 3为情感合成-度逍遥
            Duyaya // 4为情感合成-度丫丫
        }

        private const string UrlTts = "http://tsn.baidu.com/text2audio";

        public Tts(string apiKey, string secretKey) : base(apiKey, secretKey)
        {
        }

        public IEnumerator Synthesis(string text, Action<TtsResponse> callback, int speed = 5, int pit = 5, int vol = 5,
            Pronouncer per = Pronouncer.Female)
        {
            yield return PreAction();

            if (tokenFetchStatus == Base.TokenFetchStatus.Failed)
            {
                Debug.LogError("Token was fetched failed. Please check your APIKey and SecretKey");
                callback(new TtsResponse()
                {
                    err_no = -1,
                    err_msg = "Token was fetched failed. Please check your APIKey and SecretKey"
                });
                yield break;
            }

            var param = new Dictionary<string, string>();
            param.Add("tex", text);
            param.Add("tok", Token);
            param.Add("cuid", SystemInfo.deviceUniqueIdentifier);
            param.Add("ctp", "1");
            param.Add("lan", "zh");
            param.Add("spd", Mathf.Clamp(speed, 0, 9).ToString());
            param.Add("pit", Mathf.Clamp(pit, 0, 9).ToString());
            param.Add("vol", Mathf.Clamp(vol, 0, 15).ToString());
            param.Add("per", ((int)per).ToString());
#if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP
            param.Add("aue", "6"); // set to wav, default is mp3
#endif

            string url = UrlTts;
            int i = 0;
            foreach (var p in param)
            {
                url += i != 0 ? "&" : "?";
                url += p.Key + "=" + p.Value;
                i++;
            }

#if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP
            var www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.WAV);
#else
            var www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG);
#endif
            Debug.Log("[WitBaiduAip]" + www.url);
            yield return www.SendWebRequest();


            if (string.IsNullOrEmpty(www.error))
            {
                var type = www.GetResponseHeader("Content-Type");
                Debug.Log("[WitBaiduAip]response type: " + type);

                if (type.Contains("audio"))
                {
#if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP
                    var clip = DownloadHandlerAudioClip.GetContent(www);
                    var response = new TtsResponse { clip = clip };
#else
                    var response = new TtsResponse {clip = DownloadHandlerAudioClip.GetContent(www) };
#endif
                    callback(response);
                }
                else
                {
                    var textBytes = www.downloadHandler.data;
                    var errorText = Encoding.UTF8.GetString(textBytes);
                    Debug.LogError("[WitBaiduAip]" + errorText);
                    callback(JsonUtility.FromJson<TtsResponse>(errorText));
                }
            }
            else
            {
                Debug.LogError(www.error);
            }
        }
    }
}

到这里我们的前期工作就做完了,接下来就是再场景中使用了 首先新建一个场景语音识别,场景中2个按钮一个text文本

using UnityEngine;
using UnityEngine.UI;
using Wit.BaiduAip.Speech;

public class AsrDemo : MonoBehaviour
{
    public string APIKey = "";
    public string SecretKey = "";
    public Button StartButton;
    public Button StopButton;
    public Text DescriptionText;

    private AudioClip _clipRecord;
    private Asr _asr;

    // Microphone is not supported in Webgl
#if !UNITY_WEBGL

    void Start()
    {
        _asr = new Asr(APIKey, SecretKey);
        StartCoroutine(_asr.GetAccessToken());

        StartButton.gameObject.SetActive(true);
        StopButton.gameObject.SetActive(false);
        DescriptionText.text = "";

        StartButton.onClick.AddListener(OnClickStartButton);
        StopButton.onClick.AddListener(OnClickStopButton);
    }

    private void OnClickStartButton()
    {
        StartButton.gameObject.SetActive(false);
        StopButton.gameObject.SetActive(true);
        DescriptionText.text = "Listening...";

        _clipRecord = Microphone.Start(null, false, 30, 16000);
    }

    private void OnClickStopButton()
    {
        StartButton.gameObject.SetActive(false);
        StopButton.gameObject.SetActive(false);
        DescriptionText.text = "Recognizing...";
        Microphone.End(null);
        Debug.Log("[WitBaiduAip demo]end record");
        var data = Asr.ConvertAudioClipToPCM16(_clipRecord);
        StartCoroutine(_asr.Recognize(data, s =>
        {
            DescriptionText.text = s.result != null && s.result.Length > 0 ? s.result[0] : "未识别到声音";

            StartButton.gameObject.SetActive(true);
        }));
    }
#endif
}

然后拖拽到摄像机上,配置脚本的引用文件

然后现在你可以测试你的场景了,可以识别到你说的话. 下面是语音合成,新建一个场景然后新键脚本文件

using UnityEngine;
using UnityEngine.UI;
using Wit.BaiduAip.Speech;

public class TtsDemo : MonoBehaviour
{
    public string APIKey = "";
    public string SecretKey = "";
    public Button SynthesisButton;
    public InputField Input;
    public Text DescriptionText;

    private Tts _asr;
    private AudioSource _audioSource;
    private bool _startPlaying;

    void Start()
    {
        _asr = new Tts(APIKey, SecretKey);
        StartCoroutine(_asr.GetAccessToken());

        _audioSource = gameObject.AddComponent<AudioSource>();

        DescriptionText.text = "";

        SynthesisButton.onClick.AddListener(OnClickSynthesisButton);
    }

    private void OnClickSynthesisButton()
    {
        SynthesisButton.gameObject.SetActive(false);
        DescriptionText.text = "合成中...";

        StartCoroutine(_asr.Synthesis(Input.text, s =>
        {
            if (s.Success)
            {
                DescriptionText.text = "合成成功,正在播放";
                _audioSource.clip = s.clip;
                _audioSource.Play();

                _startPlaying = true;
            }
            else
            {
                DescriptionText.text = s.err_msg;
                SynthesisButton.gameObject.SetActive(true);
            }
        }));
    }

    void Update()
    {
        if (_startPlaying)
        {
            if (!_audioSource.isPlaying)
            {
                _startPlaying = false;
                DescriptionText.text = "播放完毕,可以修改文本继续测试";
                SynthesisButton.gameObject.SetActive(true);
            }
        }
    }
}

场景中加入:输入框和一个按钮 把脚本拖拽到摄像机上 脚本配置如下:

然后运行的时候在输入框中输入你要合成的文字,点击按钮之后可以听到合成的声音.

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

我来说两句

0 条评论
登录 后参与评论

相关文章

  • Jtro的技术分享:Unity读取excel表格文件生成asset配置表文件

    项目开源地址:https://github.com/641273917/unity_Excel-asset- 许多时候,我们需要通过excel表格来生成我们要...

    LittleU
  • Jtro的技术分享:游戏模式之命令模式

    命令模式 命令模式(Command Pattern)是一种数据驱动的设计模式,它属于行为型模式。请求以命令的形式包裹在对象中,并传给调用对象。调用对象寻找可以...

    LittleU
  • Jtro的技术分享:三言两语说反射(unity中使用反射)

    程序集包含模块,而模块包含类型,类型又包含成员。反射则提供了封装程序集、模块和类型的对象。

    LittleU
  • MVC 基础

    用户2434869
  • 装饰者模式浅析

    装饰角色,持有一个Component对象的实例,并定义一个与Componnet接口一致的接口。

    孟君
  • 【一起学系列】之迭代器&组合:虽然有点用不上啦

    【产品】:嘿,有一个好消息,咱们旗下的餐厅把月巴克的咖啡店吞并了!太棒了!年终奖稳了!

    Kerwin
  • Android开发笔记(六十六)自定义对话框

    Android中最常用的对话框是AlertDialog,它可以完成常见的交互操作,如提示、确认、选择等等,然后就是进度对话框ProgressDialog(参...

    用户4464237
  • Spring Security (三) 核心配置解读

    上一篇文章《Spring Security(二)--Guides》,通过Spring Security的配置项了解了Spring Security是如何保护我们...

    程序猿DD
  • ASP.NET MVC5+EF6+EasyUI 后台管理系统(33)-MVC 表单验证

    注:本节阅读需要有MVC 自定义验证的基础,否则比较吃力 一直以来表单的验证都是不可或缺的,微软的东西还是做得比较人性化的,从webform到MVC,都做到了双...

    用户1149182
  • Android在类微信程序中实现蓝牙聊天功能的示例代码

    1.初次打开程序时右上角标题栏显示“无连接”,点击旁边的按钮选择“我的好友”,进入配对界面; 2.选择好友之后,返回主界面,标题栏会显示已连接的手机型号; ...

    砸漏

扫码关注云+社区

领取腾讯云代金券