voice.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. /**
  2. * Created by lycheng on 2019/8/1.
  3. *
  4. * 语音听写流式 WebAPI 接口调用示例 接口文档(必看):https://doc.xfyun.cn/rest_api/语音听写(流式版).html
  5. * webapi 听写服务参考帖子(必看):http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38947&extra=
  6. * 语音听写流式WebAPI 服务,热词使用方式:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写---个性化热词,上传热词
  7. * 注意:热词只能在识别的时候会增加热词的识别权重,需要注意的是增加相应词条的识别率,但并不是绝对的,具体效果以您测试为准。
  8. * 错误码链接:
  9. * https://www.xfyun.cn/doc/asr/voicedictation/API.html#%E9%94%99%E8%AF%AF%E7%A0%81
  10. * https://www.xfyun.cn/document/error-code (code返回错误码时必看)
  11. * 语音听写流式WebAPI 服务,方言或小语种试用方法:登陆开放平台https://www.xfyun.cn/后,在控制台--语音听写(流式)--方言/语种处添加
  12. * 添加后会显示该方言/语种的参数值
  13. *
  14. */
  15. // 1. websocket连接:判断浏览器是否兼容,获取websocket url并连接,这里为了方便本地生成websocket url
  16. // 2. 获取浏览器录音权限:判断浏览器是否兼容,获取浏览器录音权限,
  17. // 3. js获取浏览器录音数据
  18. // 4. 将录音数据处理为文档要求的数据格式:采样率16k或8K、位长16bit、单声道;该操作属于纯数据处理,使用webWork处理
  19. // 5. 根据要求(采用base64编码,每次发送音频间隔40ms,每次发送音频字节数1280B)将处理后的数据通过websocket传给服务器,
  20. // 6. 实时接收websocket返回的数据并进行处理
  21. // ps: 该示例用到了es6中的一些语法,建议在chrome下运行
  22. import CryptoJS from "crypto-js";
  23. let transWorker = new Worker(new URL('./transcode.worker.js', import.meta.url));
  24. //APPID,APISecret,APIKey在控制台-我的应用-语音听写(流式版)页面获取
  25. const APPID = "8c1c8431";
  26. const API_SECRET = "ZTVhNDAyZWIyYTNlODFmNTdmODI3MGM3";
  27. const API_KEY = "ebc9b65a1f14b924b0e95730aa52f9ea";
  28. /**
  29. * 获取websocket url
  30. * 该接口需要后端提供,这里为了方便前端处理
  31. */
  32. function getWebSocketUrl() {
  33. return new Promise((resolve) => {
  34. // 请求地址根据语种不同变化
  35. var url = "wss://iat-api.xfyun.cn/v2/iat";
  36. var host = "iat-api.xfyun.cn";
  37. var apiKey = API_KEY;
  38. var apiSecret = API_SECRET;
  39. var date = new Date().toGMTString();
  40. var algorithm = "hmac-sha256";
  41. var headers = "host date request-line";
  42. var signatureOrigin = `host: ${host}\ndate: ${date}\nGET /v2/iat HTTP/1.1`;
  43. var signatureSha = CryptoJS.HmacSHA256(signatureOrigin, apiSecret);
  44. var signature = CryptoJS.enc.Base64.stringify(signatureSha);
  45. var authorizationOrigin = `api_key="${apiKey}", algorithm="${algorithm}", headers="${headers}", signature="${signature}"`;
  46. var authorization = btoa(authorizationOrigin);
  47. url = `${url}?authorization=${authorization}&date=${date}&host=${host}`;
  48. resolve(url);
  49. });
  50. }
  51. export class IatRecorder {
  52. constructor({ language, accent, appId } = {}) {
  53. let self = this;
  54. this.status = "null";
  55. this.language = language || "zh_cn";
  56. this.accent = accent || "mandarin";
  57. this.appId = appId || APPID;
  58. // 记录音频数据
  59. this.audioData = [];
  60. // 记录听写结果
  61. this.resultText = "";
  62. // wpgs下的听写结果需要中间状态辅助记录
  63. this.resultTextTemp = "";
  64. transWorker.onmessage = function (event) {
  65. self.audioData.push(...event.data);
  66. };
  67. }
  68. // 可用于操作业务组件内的逻辑 外部如果传this APP 就是 组件实例的this
  69. // setAPP(APP) {
  70. // this.APP = APP
  71. // }
  72. // 修改录音听写状态
  73. setStatus(status) {
  74. this.onWillStatusChange &&
  75. this.status !== status &&
  76. this.onWillStatusChange(this.status, status, this.audioData);
  77. this.status = status;
  78. }
  79. setResultText({ resultText, resultTextTemp } = {}) {
  80. this.onTextChange && this.onTextChange(resultTextTemp || resultText || "");
  81. resultText !== undefined && (this.resultText = resultText);
  82. resultTextTemp !== undefined && (this.resultTextTemp = resultTextTemp);
  83. }
  84. // 修改听写参数
  85. setParams({ language, accent } = {}) {
  86. language && (this.language = language);
  87. accent && (this.accent = accent);
  88. }
  89. // 连接websocket
  90. connectWebSocket() {
  91. return getWebSocketUrl().then((url) => {
  92. let iatWS;
  93. if ("WebSocket" in window) {
  94. iatWS = new WebSocket(url);
  95. } else if ("MozWebSocket" in window) {
  96. iatWS = new MozWebSocket(url);
  97. } else {
  98. alert("浏览器不支持WebSocket");
  99. return;
  100. }
  101. this.webSocket = iatWS;
  102. this.setStatus("init");
  103. iatWS.onopen = (e) => {
  104. // console.log('iatWSiatWSiatWS startstartstartstart')
  105. this.setStatus("ing");
  106. // 重新开始录音
  107. setTimeout(() => {
  108. this.webSocketSend();
  109. }, 20);
  110. };
  111. iatWS.onmessage = (e) => {
  112. // console.log('iatWS onmessage', JSON.stringify(e.data))
  113. this.result(e.data);
  114. };
  115. iatWS.onerror = () => {
  116. this.recorderStop();
  117. };
  118. iatWS.onclose = () => {
  119. this.recorderStop();
  120. };
  121. });
  122. }
  123. // 初始化浏览器录音
  124. recorderInit() {
  125. navigator.getUserMedia =
  126. navigator.getUserMedia ||
  127. navigator.webkitGetUserMedia ||
  128. navigator.mozGetUserMedia ||
  129. navigator.msGetUserMedia;
  130. // 创建音频环境
  131. try {
  132. this.audioContext = new (window.AudioContext ||
  133. window.webkitAudioContext)();
  134. this.audioContext.resume();
  135. if (!this.audioContext) {
  136. alert("浏览器不支持webAudioApi相关接口");
  137. return;
  138. }
  139. } catch (e) {
  140. if (!this.audioContext) {
  141. alert("浏览器不支持webAudioApi相关接口");
  142. return;
  143. }
  144. }
  145. // 获取浏览器录音权限
  146. if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
  147. navigator.mediaDevices
  148. .getUserMedia({
  149. audio: true,
  150. video: false,
  151. })
  152. .then((stream) => {
  153. getMediaSuccess(stream);
  154. })
  155. .catch((e) => {
  156. getMediaFail(e);
  157. });
  158. } else if (navigator.getUserMedia) {
  159. navigator.getUserMedia(
  160. {
  161. audio: true,
  162. video: false,
  163. },
  164. (stream) => {
  165. getMediaSuccess(stream);
  166. },
  167. function (e) {
  168. getMediaFail(e);
  169. }
  170. );
  171. } else {
  172. if (
  173. navigator.userAgent.toLowerCase().match(/chrome/) &&
  174. location.origin.indexOf("https://") < 0
  175. ) {
  176. alert(
  177. "chrome下获取浏览器录音功能,因为安全性问题,需要在localhost或127.0.0.1或https下才能获取权限"
  178. );
  179. } else {
  180. alert("无法获取浏览器录音功能,请升级浏览器或使用chrome");
  181. }
  182. this.audioContext && this.audioContext.close();
  183. return;
  184. }
  185. // 获取浏览器录音权限成功的回调
  186. let getMediaSuccess = (stream) => {
  187. // console.log("getMediaSuccess");
  188. // 创建一个用于通过JavaScript直接处理音频
  189. this.scriptProcessor = this.audioContext.createScriptProcessor(0, 1, 1);
  190. this.scriptProcessor.onaudioprocess = (e) => {
  191. // 去处理音频数据
  192. // console.log('this.statusthis.status', this.status)
  193. if (this.status === "ing") {
  194. transWorker.postMessage(e.inputBuffer.getChannelData(0));
  195. }
  196. };
  197. // 创建一个新的MediaStreamAudioSourceNode 对象,使来自MediaStream的音频可以被播放和操作
  198. this.mediaSource = this.audioContext.createMediaStreamSource(stream);
  199. // 连接
  200. this.mediaSource.connect(this.scriptProcessor);
  201. this.scriptProcessor.connect(this.audioContext.destination);
  202. this.connectWebSocket();
  203. // var audioCtx = new AudioContext(); //开启自说自听 调试用
  204. // var source = audioCtx.createMediaStreamSource(stream);
  205. // source.connect(audioCtx.destination)
  206. };
  207. let getMediaFail = (e) => {
  208. alert("请求麦克风失败");
  209. this.audioContext && this.audioContext.close();
  210. this.audioContext = undefined;
  211. // 关闭websocket
  212. if (this.webSocket && this.webSocket.readyState === 1) {
  213. this.webSocket.close();
  214. }
  215. };
  216. }
  217. recorderStart() {
  218. // this.audioData.length = 0;
  219. if (!this.audioContext) {
  220. this.recorderInit();
  221. } else {
  222. this.audioContext.resume();
  223. this.connectWebSocket();
  224. }
  225. }
  226. // 暂停录音
  227. recorderStop() {
  228. // safari下suspend后再次resume录音内容将是空白,设置safari下不做suspend
  229. if (
  230. !(
  231. /Safari/.test(navigator.userAgent) && !/Chrome/.test(navigator.userAgen)
  232. )
  233. ) {
  234. this.audioContext && this.audioContext.suspend();
  235. }
  236. // this.audioData.length = 0;
  237. this.setStatus("end");
  238. }
  239. // 处理音频数据
  240. // transAudioData(audioData) {
  241. // audioData = transAudioData.transaction(audioData)
  242. // this.audioData.push(...audioData)
  243. // }
  244. // 对处理后的音频数据进行base64编码,
  245. toBase64(buffer) {
  246. var binary = "";
  247. var bytes = new Uint8Array(buffer);
  248. var len = bytes.byteLength;
  249. for (var i = 0; i < len; i++) {
  250. binary += String.fromCharCode(bytes[i]);
  251. }
  252. return window.btoa(binary);
  253. }
  254. // 向webSocket发送数据
  255. webSocketSend() {
  256. if (this.webSocket.readyState !== 1) {
  257. return;
  258. }
  259. let audioData = this.audioData.splice(0, 1280);
  260. var params = {
  261. common: {
  262. app_id: this.appId,
  263. },
  264. business: {
  265. language: this.language, //小语种可在控制台--语音听写(流式)--方言/语种处添加试用
  266. domain: "iat",
  267. accent: this.accent, //中文方言可在控制台--语音听写(流式)--方言/语种处添加试用
  268. vad_eos: 500,
  269. dwa: "wpgs", //为使该功能生效,需到控制台开通动态修正功能(该功能免费)
  270. },
  271. data: {
  272. status: 0,
  273. format: "audio/L16;rate=16000",
  274. encoding: "raw",
  275. audio: this.toBase64(audioData),
  276. },
  277. };
  278. this.webSocket.send(JSON.stringify(params));
  279. // console.log('send 0')
  280. this.handlerInterval = setInterval(() => {
  281. // websocket未连接
  282. if (this.webSocket.readyState !== 1) {
  283. this.audioData = [];
  284. clearInterval(this.handlerInterval);
  285. return;
  286. }
  287. if (this.audioData.length === 0) {
  288. if (this.status === "end") {
  289. this.webSocket.send(
  290. JSON.stringify({
  291. data: {
  292. status: 2,
  293. format: "audio/L16;rate=16000",
  294. encoding: "raw",
  295. audio: "",
  296. },
  297. })
  298. );
  299. // console.log('send 2')
  300. this.audioData = [];
  301. clearInterval(this.handlerInterval);
  302. }
  303. return false;
  304. }
  305. audioData = this.audioData.splice(0, 1280);
  306. // 中间帧
  307. this.webSocket.send(
  308. JSON.stringify({
  309. data: {
  310. status: 1,
  311. format: "audio/L16;rate=16000",
  312. encoding: "raw",
  313. audio: this.toBase64(audioData),
  314. },
  315. })
  316. );
  317. let t = (new Date).valueOf()
  318. if (t % 11 === 0) {
  319. // console.log('send 1')
  320. }
  321. }, 40);
  322. }
  323. result(resultData) {
  324. // 识别结束
  325. let jsonData = JSON.parse(resultData);
  326. if (jsonData.data && jsonData.data.result) {
  327. let data = jsonData.data.result;
  328. let str = "";
  329. let resultStr = "";
  330. let ws = data.ws;
  331. for (let i = 0; i < ws.length; i++) {
  332. str = str + ws[i].cw[0].w;
  333. }
  334. // console.log('resultresultresultresult', this.resultText + '--------' + str)
  335. // 开启wpgs会有此字段(前提:在控制台开通动态修正功能)
  336. // 取值为 "apd"时表示该片结果是追加到前面的最终结果;取值为"rpl" 时表示替换前面的部分结果,替换范围为rg字段
  337. if (data.pgs) {
  338. if (data.pgs === "apd") {
  339. // 将resultTextTemp同步给resultText
  340. this.setResultText({
  341. resultText: this.resultTextTemp,
  342. });
  343. }
  344. // 将结果存储在resultTextTemp中
  345. this.setResultText({
  346. resultTextTemp: this.resultText + str,
  347. });
  348. } else {
  349. this.setResultText({
  350. resultText: this.resultText + str,
  351. });
  352. }
  353. }
  354. if (jsonData.code === 0 && jsonData.data.status === 2) {
  355. this.webSocket.close();
  356. }
  357. if (jsonData.code !== 0) {
  358. this.webSocket.close();
  359. // console.log(`${jsonData.code}:${jsonData.message}`);
  360. }
  361. }
  362. clearResult() {
  363. this.setResultText({ resultText: "", resultTextTemp: "" });
  364. }
  365. start() {
  366. this.clearResult();
  367. this.recorderStart();
  368. }
  369. stop() {
  370. this.webSocket && this.webSocket.close();
  371. this.recorderStop();
  372. }
  373. reStart() {
  374. this.stop()
  375. setTimeout(() => {
  376. this.start()
  377. }, 20)
  378. }
  379. }
  380. // ======================开始调用示例 可应用于业务代码中=============================
  381. /*var vConsole = new VConsole();
  382. let iatRecorder = new IatRecorder();
  383. let countInterval;
  384. // 状态改变时处罚
  385. iatRecorder.onWillStatusChange = function (oldStatus, status) {
  386. // 可以在这里进行页面中一些交互逻辑处理:倒计时(听写只有60s),录音的动画,按钮交互等
  387. // 按钮中的文字
  388. let text = {
  389. null: "开始识别", // 最开始状态
  390. init: "开始识别", // 初始化状态
  391. ing: "结束识别", // 正在录音状态
  392. end: "开始识别", // 结束状态
  393. };
  394. let senconds = 0;
  395. $(".taste-button")
  396. .removeClass(`status-${oldStatus}`)
  397. .addClass(`status-${status}`)
  398. .text(text[status]);
  399. if (status === "ing") {
  400. $("hr").addClass("hr");
  401. $(".taste-content").css("display", "none");
  402. $(".start-taste").addClass("flex-display-1");
  403. // 倒计时相关
  404. countInterval = setInterval(() => {
  405. senconds++;
  406. $(".used-time").text(
  407. `0${Math.floor(senconds / 60)}:${Math.floor(senconds / 10)}${
  408. senconds % 10
  409. }`
  410. );
  411. if (senconds >= 60) {
  412. this.stop();
  413. clearInterval(countInterval);
  414. }
  415. }, 1000);
  416. } else if (status === "init") {
  417. $(".time-box").show();
  418. $(".used-time").text("00:00");
  419. } else {
  420. $(".time-box").hide();
  421. $("hr").removeClass("hr");
  422. clearInterval(countInterval);
  423. }
  424. };
  425. // 监听识别结果的变化
  426. iatRecorder.onTextChange = function (text) {
  427. $("#result_output").text(text);
  428. };
  429. $("#taste_button, .taste-button").click(function () {
  430. if (iatRecorder.status === "ing") {
  431. iatRecorder.stop();
  432. } else {
  433. iatRecorder.start();
  434. }
  435. });*/