实现一个前后端结构的语音识别小程序服务

原创

张世强

修改于 2020-06-04 13:36:41

3.1K0

文章被收录于专栏：zhangzhang

一、实现方式：通过录音管理器 RecorderManager调用手机的录音功能实现音频的在线获取，并将获取到的音频传入到服务端，服务端调用腾讯云“一句话识别”API将音频中的文字提取出来回调到小程序端

二、实现流程

第一步：配置服务器域名

第二步：实现小程序的Demo

在小程序公共配置文件app.json中，添加页面生成参数

"pages/voice/voice",

点击"编译"生成页面目录及页面

voice.wxml

<!--pages/voice/voice.wxml-->
<view class="REC">
  <view class="time">{{status==0?'录音时长':(status==3?'录音结束':'录音中')}}：{{time}} 秒 ({{duration/1000}}秒)</view>
  <view class="rin">
  <view class="{{status==3 && actionStatus==0?'show':'hide'}}" bindtap="play" hover-class="skip">{{actionStatus==1?'播放中':'播放录音'}}</view>
    <view class="{{status==3?'show':'hide'}}" bindtap="again" hover-class="skip">再次录制</view> 
  </view>
  <view class="anniu">
    <view class="{{status==0?'highlight':'gray'}}" bindtap="start" hover-class="skip">开始</view>
    <view class="{{status==1?'highlight':'gray'}}" bindtap="stop" hover-class="skip">暂停</view>
    <view class="{{status==2?'highlight':'gray'}}" bindtap="continue" hover-class="skip">继续</view>
    <view class="{{(status==1 || status==2)?'highlight':'gray'}}" bindtap="shutoff" hover-class="skip">停止</view>
    <view class="{{status==3?'highlight':'gray'}}" bindtap="recognition" hover-class="skip">识别</view>
  </view>
   <view class="progress">
    <progress percent="{{time*(100/(duration/1000))}}"  stroke-width="10" backgroundColor="#fff" border-radius="15" stroke-width="4" color="#7FFF00" active />
  </view>
</view>
<view class=".REC">
  <textarea placeholder="录音完成后点击识别可将音频转文字" auto-focus value="{{ Words }}" />
</view>

使用的组件：

使用的视图容器：

使用的XML语法：

使用的视图层：

voice.wxss

/* pages/voice/voice.wxss */
.REC {
  border-radius: 25rpx;
  background-color: rgb( 199,237,204 );
  padding: 6rpx 0rpx;
  margin: 15rpx 35rpx;
}

.rin {
  justify-content: space-between;
  align-items: center;
  margin: 0rpx 120rpx;
  display: flex;
}

.rin .show {
  background-color: rgb(178, 228, 228);
  padding: 15rpx;
  width: 210rpx;
  border: 5rpx solid rgb(127, 204, 214);
  border-radius: 20rpx;
  font-size: 28rpx;
  display: flex;
  justify-content: center;
  align-items: center;
}

.rin .hide {
  padding: 15rpx;
  align-items: center;
  border-radius: 20rpx;
  display: flex;
  width: 215rpx;
  font-size: 28rpx;
  justify-content: center;
  border: 5rpx solid #eee;
  pointer-events: none;
  background-color: rgba(137, 190, 178, 0.445);
}

.time {
  text-align: center;
  line-height: 75rpx;
  font-size: 28rpx; 
}

.progress {
  margin: 25rpx;
}

.play {
  margin: 0rpx 25rpx;
}

.content {
  line-height: 60rpx;
  font-size: 28rpx;
  display: flex;
  justify-content: center;
}

.anniu {
  display: flex;
  margin: 10rpx 50rpx;
  justify-content: space-between;
}

.highlight {
  display: flex;
  font-size: 28rpx;
  width: 80rpx;
  height: 80rpx;
  justify-content: center;
  border-radius: 50%;
  align-items: center;
  background-color: rgb(107, 194, 53);
  border: 5rpx solid rgb(127, 204, 214);
}

.skip {
  transform: scale(0.9);
}



.anniu .gray {
  pointer-events: none;
  background-color: rgba(137, 190, 178, 0.445);
  display: flex;
  width: 80rpx;
  height: 80rpx;
  font-size: 28rpx;
  justify-content: center;
  align-items: center;
  border-radius: 50%;
  border: 5rpx solid rgb(241, 244, 245); 
}

WXSS样式学习

voice.json

{
  "navigationBarTitleText": "一句话识别在线测试",
  "backgroundColor": "#eeeeee"
}

全局配置

voice.js

// pages/voice/voice.js
const recorderManager = wx.getRecorderManager()
const innerAudioContext = wx.createInnerAudioContext()
var init

Page({

  /**
   * 页面的初始数据
   */
  data: {
    time: 0, 
    duration: 60000, 
    localFilePath: "", 
    status: 0, 
    actionStatus: 0, 
  },

  /**
   * 生命周期函数--监听页面加载
   */
  onLoad: function(options) {

  },

  /**
   * 生命周期函数--监听页面初次渲染完成
   */
  onReady: function() {

  },

  /**
   * 生命周期函数--监听页面显示
   */
  onShow: function() {

  },

  /**
   * 生命周期函数--监听页面隐藏
   */
  onHide: function() {

  },

  /**
   * 生命周期函数--监听页面卸载
   */
  onUnload: function() {

  },

  /**
   * 页面相关事件处理函数--监听用户下拉动作
   */
  onPullDownRefresh: function() {

  },

  /**
   * 页面上拉触底事件的处理函数
   */
  onReachBottom: function() {

  },

  /**
   * 用户点击右上角分享
   */
  onShareAppMessage: function() {

  },


  /**开始录音 */
  start: function() {
    clearInterval(init) 
    recorderManager.onStart((res) => {
      console.log('开始录音')
      this.setData({
        status: 1
      })
    })

    recorderManager.onStop((res) => {
      console.log('停止录音', res)
      this.setData({
        tempFilePath: res.tempFilePath,
        status: 3
      })
      this.timeCounter(this.data.time)
    })

    const options = {
      duration: this.data.duration,   
      format: 'mp3', 
    }
    this.timeCounter()
    recorderManager.start(options)
  },

  /**
   * 录音暂停
   */
  stop: function() {
    recorderManager.onPause(() => {
      console.log('recorder pause')
      this.setData({
        status: 2
      })
    })
    this.timeCounter(this.data.time)
    recorderManager.pause()
  },

  /**
   * 录音继续
   */
  continue: function() {
    this.setData({
      status: 1
    })
    this.timeCounter()
    recorderManager.resume()
  },

  /**
   * 录音停止
   */
  shutoff: function() {
    recorderManager.onStop((res) => {
      console.log('recorder stop', res)
      this.setData({
        tempFilePath: res.tempFilePath,
        status: 3
      })
    })
    this.timeCounter(this.data.time)
    recorderManager.stop()

  },
  /**
   * 录音识别
   */
  recognition: function() {
    var that=this;
    wx.request({
      url: 'https://tencentcloud.cdhwdl.com:3000', //仅为示例，并非真实的接口地址
      method:'post',
      data: {
        x: "data:audio/mp3;base64," + wx.getFileSystemManager().readFileSync(this.data.tempFilePath, 'base64')
      },
      header: {
        'content-type': 'application/json' // 默认值
      },
      success (res) {
        that.setData({
          Words: res.data.Result.Result
        })
        console.log(res.data)

      }
    })

  },

  /**
   * 录音播放
   */
  play: function() {
    innerAudioContext.src = this.data.tempFilePath
    innerAudioContext.obeyMuteSwitch = false

    
    if (this.data.actionStatus == 0) {
      this.setData({
        actionStatus: 1
      })
      innerAudioContext.play()
    }
  
    innerAudioContext.onEnded(() => {
      innerAudioContext.stop()
      this.setData({
        actionStatus: 0
      })
    })
  },

  
  timeCounter: function(time) {
    var that = this
    if (time == undefined) {
   
      init = setInterval(function() {
        var time = that.data.time + 1;
        that.setData({
          time: time
        })
      }, 1000);
    } else {
      clearInterval(init)
      console.log("暂停计时")
    }
  },

  /**
   * 重新录制
   */
  again: function() {
    var that = this
    wx.showModal({
      title: "重新录音",
      content: "是否重新录制?",
      success(res) {
        if (res.confirm) {
          that.setData({
            time: 0, 
            tempFilePath: "", 
            status: 0,
            actionStatus: 0
          })
          innerAudioContext.stop()
        }
      }
    })
  }
})

使用到的知识点： Page 构造器

录音管理器

HTTPS 网络请求

文件管理器FileSystemManager读取指定编码的文件内容

数据传递setData

注意：如果自定义函数中嵌套了wx等对象函数，数据传递应该先声明"var that=this"，然后再嵌套函数，如wx.request中使用"that.setData"来传递数据

后端数据结构

第三步：搭建nodejs服务端

任意安装一款Linux发行版系统(安装过程略)

[root@zhang .nvm]# cat /etc/redhat-release 
CentOS release 6.9 (Final)

安装2.0版本以上的git客户端，如果你的系统是Centos发行版的，可以参考下面的安装演示；如果是其他发行版，可以参考git官网指引，通过简单的命令即可安装

非Centos发行版系统安装方式参考Git官方文档下载指引

Centos发行版系统（这里是Centos6.9）安装流程如下：

安装Git依赖包：

检查是否安装"Development Tools"软件组，若未安装则执行安装命令

[root@zhang tmp]# yum grouplist | grep "Development Tools"
[root@zhang tmp]# 
[root@zhang yum.repos.d]# yum  groupinstall "Development Tools" -y
[root@zhang yum.repos.d]# yum grouplist | grep "Development tools"
   Development tools

安装其他软件包（如果已安装了会提示已安装）

yum install zlib-devel -y
yum install perl-ExtUtils-MakeMaker -y
yum install asciidoc -y 
yum install xmlto -y
yum install openssl-devel -y 
yum install gcc -y 
yum install curl-devel -y 
yum install expat-devel -y 
yum install gettext-devel -y

卸载现有Git

[root@zhang git-2.0.5]# yum remove git -y

下载2.0版本的Git客户端，如果下载慢，可以用网速较好的机器下载后再上传到服务器中，下载后解压

[root@zhang tmp]# wget https://www.kernel.org/pub/software/scm/git/git-2.0.5.tar.gz

[root@zhang tmp]# ls -lh | grep git
drwxrwxr-x 19 root  root   12K Dec 19  2014 git-2.0.5
-rw-r--r--  1 root  root  4.7M Dec 19  2014 git-2.0.5.tar.gz

进入解压目录，三步编译安装法安装

软件配置与检查

[root@zhang git-2.0.5]# ./configure --prefix=/usr/local/git

编译成二进制文件

[root@zhang git-2.0.5]# make

安装编译后的文件到指定目录

[root@zhang git-2.0.5]# make install

将Git的运行程序路径配置到全局环境变量中（路径为"/usr/local/git/bin"）

[root@zhang git-2.0.5]# vi /etc/profile

[root@zhang git-2.0.5]# cat /etc/profile | grep "export PATH="
export PATH=/usr/local/nginx/sbin:/usr/local/php/bin:/usr/local/mysql/bin:$PATH:/usr/local/git/bin

使得修改生效

[root@zhang git-2.0.5]# source /etc/profile
[root@zhang git-2.0.5]#

查看git版本号

[root@zhang git-2.0.5]# git --version
git version 2.0.5

安装nvm

参考官方文档：https://github.com/nvm-sh/nvm/blob/master/README.md

[root@zhang git-2.0.5]# curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.35.3/install.sh | bash
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 13527  100 13527    0     0   5385      0  0:00:02  0:00:02 --:--:-- 12131
=> Downloading nvm from git to '/root/.nvm'
=> Cloning into '/root/.nvm'...
remote: Enumerating objects: 290, done.
remote: Counting objects: 100% (290/290), done.
remote: Compressing objects: 100% (257/257), done.
remote: Total 290 (delta 35), reused 97 (delta 20), pack-reused 0
Receiving objects: 100% (290/290), 163.27 KiB | 8.00 KiB/s, done.
Resolving deltas: 100% (35/35), done.
Checking connectivity... done.
=> Compressing and cleaning up git repository

=> Appending nvm source string to /root/.bashrc
=> Appending bash_completion source string to /root/.bashrc
=> Close and reopen your terminal to start using nvm or run the following to use it now:

export NVM_DIR="$HOME/.nvm"
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"  # This loads nvm
[ -s "$NVM_DIR/bash_completion" ] && \. "$NVM_DIR/bash_completion"  # This loads nvm bash_completion

在当前用户的环境变量配置文件"~/.bash_profile"或者全局环境变量配置文件"/etc/profile"中加入如下内容

export NVM_DIR="$([ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm")" [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" # This loads nvm

[root@zhang ~]# vi ~/.bash_profile
[root@zhang ~]# tail -2f ~/.bash_profile 
export NVM_DIR="$([ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm")"
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"

重载环境变量

source ~/.bash_profile

测试nvm是否安装成功

[root@zhang ~]# nvm --version
0.35.3
[root@zhang ~]#

安装Node.js 7.10.1 版本及以上

[root@zhang iai]# nvm install v10.6.0
Downloading and installing node v10.6.0...
Downloading https://nodejs.org/dist/v10.6.0/node-v10.6.0-linux-x64.tar.xz...
######################################################################## 100.0%
Computing checksum with sha256sum
Checksums matched!
Now using node v10.6.0 (npm v6.1.0)
[root@zhang iai]# node -v
v10.6.0

创建nodejs web项目仓库

[root@zhang data]# mkdir -p /data/nodejs

创建语音识别项目

[root@zhang ~]# mkdir /data/nodejs/asr
[root@zhang ~]# cd /data/nodejs/asr

安装tencentcloud-sdk-nodejs并创建一个项目入口文件app.js

[root@zhang asr]# npm install tencentcloud-sdk-nodejs --save
[root@zhang asr]# ls -lh
total 20K
drwxr-xr-x 51 root root 4.0K Apr 16 14:12 node_modules
-rw-r--r--  1 root root  13K Apr 16 14:12 package-lock.json
[root@zhang asr]# touch app.js
[root@zhang asr]# ls -lh
total 20K
-rw-r--r--  1 root root    0 Apr 16 14:13 app.js
drwxr-xr-x 51 root root 4.0K Apr 16 14:12 node_modules
-rw-r--r--  1 root root  13K Apr 16 14:12 package-lock.json

通过API 3.0 Explorer生成tencentcloud-sdk-nodejs下一句话识别API的调用Demo

https://console.cloud.tencent.com/api/explorer?Product=aai&Version=2018-05-22&Action=SentenceRecognition&SignVersion=

在实现Web功能之前，我们需要知道小程序的服务端只允许HTTPS协议的地址，所以我们应该通过nodejs的HTTPS模块来实现一个加密的Web服务，具体流程如下：

通过一个已经实名认证的腾讯云账号在控制台进入“SSL证书”控制台，点击【申请免费证书】为你的小程序服务端域名免费申请一个SSL加密证书

申请成功后下载证书文件压缩包

在Linux服务端nodejs的项目目录下创建certificate目录、voice语音存放目录并配置权限755

[root@zhang asr]# ls -lh
total 20K
-rw-r--r--  1 root root    0 Apr 16 14:13 app.js
drwxr-xr-x 51 root root 4.0K Apr 16 14:12 node_modules
-rw-r--r--  1 root root  13K Apr 16 14:12 package-lock.json
[root@zhang asr]# mkdir certificate
[root@zhang asr]# chmod 775 certificate/
[root@zhang asr]# mkdir voice
[root@zhang asr]# chmod 775 voice/

上传Nginx目录下的两个证书文件到服务端的certificate目录下并重名为"server.key"、"server.crt"

[root@zhang asr]# cd certificate/
[root@zhang certificate]# ls -lh
total 8.0K
-rw-r--r-- 1 root root 3.7K Apr 15 10:48 1_tencentcloud.cdhwdl.com_bundle.crt
-rw-r--r-- 1 root root 1.7K Apr 15 10:48 2_tencentcloud.cdhwdl.com.key
[root@zhang certificate]# mv 1_tencentcloud.cdhwdl.com_bundle.crt server.crt
[root@zhang certificate]# mv 2_tencentcloud.cdhwdl.com.key server.key
[root@zhang certificate]# ls -lh
total 8.0K
-rwxr-xr-x 1 root root 3.7K Apr 15 10:48 server.crt
-rwxr-xr-x 1 root root 1.7K Apr 15 10:48 server.key

编辑项目入口文件app.js,通过nodejs的https模块创建一个web服务器并调用上述Demo

const https = require('https');
const fs = require('fs');
const path = require('path');

const privateKey  = fs.readFileSync(path.join(__dirname, './certificate/server.key'), 'utf8');
const certificate = fs.readFileSync(path.join(__dirname, './certificate/server.crt'), 'utf8');
const credentials = {key: privateKey, cert: certificate};
const httpsServer = https.createServer(credentials,function(req, res){
    let body = [];
    req.on('data', (chunk) => {
        body.push(chunk);
    }).on('end', () => {
        body = Buffer.concat(body).toString();
        res.statusCode = 200;
        res.setHeader('Content-Type', 'text/plain');
        var json_ob = JSON.parse(body.trim());
        base64 = json_ob.x;
        var base64Data = base64.replace(/^data:audio\/\w+;base64,/, "");
        var dataBuffer = Buffer.from(base64Data, 'base64');
        var t1 = new Date().getTime();
        var name = t1+".mp3";
        var json = { Result: ""};
        fs.writeFile("./voice/"+name, dataBuffer, { 'flag': 'a' }, function(err) {
            if(err){
                json.Result=err;
                res.end(JSON.stringify(json));
            }
        });
        const tencentcloud = require("./node_modules/tencentcloud-sdk-nodejs");
        const AaiClient = tencentcloud.aai.v20180522.Client;
        const models = tencentcloud.aai.v20180522.Models;

        const Credential = tencentcloud.common.Credential;
        const ClientProfile = tencentcloud.common.ClientProfile;
        const HttpProfile = tencentcloud.common.HttpProfile;

        let cred = new Credential("", "");
        let httpProfile = new HttpProfile();
        httpProfile.endpoint = "aai.tencentcloudapi.com";
        let clientProfile = new ClientProfile();
        clientProfile.httpProfile = httpProfile;
        let client = new AaiClient(cred, "ap-guangzhou", clientProfile);

        let req = new models.SentenceRecognitionRequest();

        var params = {ProjectId:0,SubServiceType:2,EngSerViceType:"16k_zh",SourceType:1,VoiceFormat:"mp3",UsrAudioKey:"www",Data:base64Data};
        var params = JSON.stringify(params)
        req.from_json_string(params);


        client.SentenceRecognition(req, function(errMsg, response) {

            if (errMsg) {
                json.Result=errMsg;
                res.end(JSON.stringify(json));
            }
            json.Result=JSON.parse(response.to_json_string());
            res.end(JSON.stringify(json));
        });
    });

});


const SSLPORT = 3000;
httpsServer.listen(SSLPORT, '0.0.0.0', () => {});

服务端后台启用这个web服务

[root@zhang asr]# nohup node app.js &
[1] 2599
nohup: ignoring input and appending output to `nohup.out'

我们先通过postman来在线调试下

测试OK，可以将web服务地址对接到一句话识别小程序页面中了

真实调试

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。

node.js

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。

登录后参与评论

0 条评论

热度

实现一个前后端结构的语音识别小程序服务

实现一个前后端结构的语音识别小程序服务

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐