springBoot+vue+百度语音识别
2021-04-17 13:28
标签:ora 输出 recorder node stop form fast exec 写入 1.将百度语音识别demo下载下来 并且用maven封装成一个jar包 核心代码如下 调用 obtainAsrResult 方法即可获得识别后字符串 2.使用 ffmpeg 对音频进行转码 下载ffmpeg 并且将bin目录设置到环境变量即可 代码如下: 最后 附上vue实现音功能 并上传到后台 获得识别后字符串 1.methods域内定义以下方法 html元素调用 readyOriginal 方法 mounted 域内调用 initAudio 方法 记得引入js文件 springBoot+vue+百度语音识别 标签:ora 输出 recorder node stop form fast exec 写入 原文地址:https://www.cnblogs.com/guanxiaohe/p/13300252.htmlpackage com.baidu.speech.restapi.asrdemo;
import com.alibaba.fastjson.JSONObject;
import com.baidu.speech.restapi.asrdemo.common.ConnUtil;
import com.baidu.speech.restapi.asrdemo.common.DemoException;
import com.baidu.speech.restapi.asrdemo.common.TokenHolder;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
public class AsrUtil {
private final boolean METHOD_RAW = false; // 默认以json方式上传音频文件
// 填写网页上申请的appkey 如 $apiKey="g8eBUMSokVB1BHGmgxxxxxx"
// private final String APP_KEY = "kVcnfD9iW2XVZSMaLMrtLYIz";
// my
private static final String APP_KEY = "XXXXXXXXXXXXXX";
// 填写网页上申请的APP SECRET 如 $SECRET_KEY="94dc99566550d87f8fa8ece112xxxxx"
// private final String SECRET_KEY = "O9o1O213UgG5LFn0bDGNtoRN3VWl2du6";
// my
private static final String SECRET_KEY = "XXXXXXXXXXXX";
// 需要识别的文件
private static final String FILENAME = "16k.pcm";
// 文件格式, 支持pcm/wav/amr 格式,极速版额外支持m4a 格式
private static final String FORMAT = FILENAME.substring(FILENAME.length() - 3);
private static String CUID = "1234567JAVA";
// 采样率固定值
private static final int RATE = 16000;
private static String URL;
private static int DEV_PID;
//private int LM_ID;//测试自训练平台需要打开此注释
private static String SCOPE;
// 普通版 参数
// {
// URL = "http://vop.baidu.com/server_api"; // 可以改为https
// // 1537 表示识别普通话,使用输入法模型。 其它语种参见文档
// DEV_PID = 1537;
// SCOPE = "audio_voice_assistant_get";
// }
// 自训练平台 参数
/*{
//自训练平台模型上线后,您会看见 第二步:“”获取专属模型参数pid:8001,modelid:1234”,按照这个信息获取 dev_pid=8001,lm_id=1234
DEV_PID = 8001;
LM_ID = 1234;
}*/
// 极速版 参数
static {
URL = "http://vop.baidu.com/pro_api"; // 可以改为https
DEV_PID = 80001;
SCOPE = "brain_enhanced_asr";
}
/* 忽略scope检查,非常旧的应用可能没有
{
SCOPE = null;
}
*/
public static String obtainAsrResult(byte[] bytes) throws IOException, DemoException {
String resultJson = execute(bytes);
System.out.println("识别结束:结果是:");
System.out.println(resultJson);
return resultJson;
}
public static String execute(byte[] bytes) throws IOException, DemoException {
TokenHolder holder = new TokenHolder(APP_KEY, SECRET_KEY, SCOPE);
holder.resfresh();
String token = holder.getToken();
String result = null;
result = runJsonPostMethod(token, bytes);
return result;
}
private String runRawPostMethod(String token) throws IOException, DemoException {
String url2 = URL + "?cuid=" + ConnUtil.urlEncode(CUID) + "&dev_pid=" + DEV_PID + "&token=" + token;
//测试自训练平台需要打开以下信息
//String url2 = URL + "?cuid=" + ConnUtil.urlEncode(CUID) + "&dev_pid=" + DEV_PID + "&lm_id="+ LM_ID + "&token=" + token;
String contentTypeStr = "audio/" + FORMAT + "; rate=" + RATE;
//System.out.println(url2);
byte[] content = getFileContent(FILENAME);
HttpURLConnection conn = (HttpURLConnection) new URL(url2).openConnection();
conn.setConnectTimeout(5000);
conn.setRequestProperty("Content-Type", contentTypeStr);
conn.setRequestMethod("POST");
conn.setDoOutput(true);
conn.getOutputStream().write(content);
conn.getOutputStream().close();
System.out.println("url is " + url2);
System.out.println("header is " + "Content-Type :" + contentTypeStr);
String result = ConnUtil.getResponseString(conn);
return result;
}
public static String runJsonPostMethod(String token,byte[] bytes) throws DemoException, IOException {
// byte[] content = getFileContent(FILENAME);
String speech = base64Encode(bytes);
JSONObject params = new JSONObject();
params.put("dev_pid", DEV_PID);
//params.put("lm_id",LM_ID);//测试自训练平台需要打开注释
params.put("format", "wav");
params.put("rate", RATE);
params.put("token", token);
params.put("cuid", CUID);
params.put("channel", "1");
params.put("len", bytes.length);
params.put("speech", speech);
// System.out.println(params.toString());
HttpURLConnection conn = (HttpURLConnection) new URL(URL).openConnection();
conn.setConnectTimeout(5000);
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");
conn.setDoOutput(true);
conn.getOutputStream().write(params.toString().getBytes());
conn.getOutputStream().close();
String result = ConnUtil.getResponseString(conn);
params.put("speech", "base64Encode(getFileContent(FILENAME))");
System.out.println("url is : " + URL);
System.out.println("params is :" + params.toString());
return result;
}
private byte[] getFileContent(String filename) throws DemoException, IOException {
File file = new File(filename);
if (!file.canRead()) {
System.err.println("文件不存在或者不可读: " + file.getAbsolutePath());
throw new DemoException("file cannot read: " + file.getAbsolutePath());
}
FileInputStream is = null;
try {
is = new FileInputStream(file);
return ConnUtil.getInputStreamContent(is);
} finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
private static String base64Encode(byte[] content) {
/**
Base64.Encoder encoder = Base64.getEncoder(); // JDK 1.8 推荐方法
String str = encoder.encodeToString(content);
**/
char[] chars = Base64Util.encode(content); // 1.7 及以下,不推荐,请自行跟换相关库
String str = new String(chars);
return str;
}
}
@Override
public String aiAsrTest(MultipartFile file) {
String r = null;
try {
byte[] bytes = file.getBytes();
// 对上传文件进行转码处理
String path = "D:\\bwbd\\temp\\";
File dir = new File(path);
if (dir == null || !dir.exists()) {
dir.mkdirs();
}
File file1 = new File(path + "16k1.wav");
file.transferTo(file1);
File file2 = new File(path + "16k2.wav");
try {
logger.info("========音频格式转换======");
Runtime runtime = Runtime.getRuntime();
String cutCmd = "ffmpeg -y -i " + file1 + " -acodec pcm_s16le -f s16le -ac 1 -ar 16000 " + file2;
Process proce = runtime.exec(cutCmd);
InputStream erro = proce.getErrorStream();
byte[] a = new byte[1024];
int j = 0;
while ((j = erro.read(a)) > -1) {
// logger.info(new String(a));
}
} catch (Exception e) {
e.printStackTrace();
logger.info("=========文件 "+ file + " 正在转换出现异常");
}
byte[] fileByteArray = FileUtil.getFileByteArray(file2);
String result = AsrUtil.obtainAsrResult(fileByteArray);
log.info("===ai接口返回:" + result);
JSONObject jsonObject = JSONObject.parseObject(result);
Object result1 = jsonObject.get("result");
if (null != result1) {
List
initAudio(){
// this.$nextTick(() => {
// try {
// //
// window.AudioContext = window.AudioContext || window.webkitAudioContext;
// navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia;
// window.URL = window.URL || window.webkitURL;
//
// audio_context = new AudioContext;
// console.log(‘navigator.getUserMedia ‘ + (navigator.getUserMedia ? ‘available.‘ : ‘not present!‘));
// } catch (e) {
// alert(‘No web audio support in this browser!‘);
// }
var _this = this;
navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia;
navigator.getUserMedia({audio: true}, function (stream) {
let recorder = new HZRecorder(stream);
_this.recorder = recorder;
console.log(‘初始化完成‘);
}, function(e) {
console.log(‘No live audio input: ‘ + e);
});
// })
},
readyOriginal () {
if (!this.isVoice) {
//
this.recorder && this.recorder.start();
this.isVoice = true
} else {
this.isVoice = false
// 结束录音
this.recorder && this.recorder.stop();
setTimeout(()=> {
// https://localhost/bwbd/fg/aiAsrTest
var mp3Blob = this.recorder.upload();
var fd = new FormData();
fd.append(‘file‘, mp3Blob);
// this.$axios.post(‘https://localhost/api/webUser/insertUserLog‘,allJoin).then(
// res=>{
// if(res.data.data===true){
// return
// }
// }
// )
this.$axios.post(‘http://localhost/bwbd/fg/aiAsrTest‘,fd).then((res) => {
// 这里做登录拦截
if (res.data.status === 200) {
console.log(‘保存成功‘);
console.log(res.data.data)
} else {
this.returnmsg = ‘上传失败‘
}
})
},1000)
}
},
import { HZRecorder} from ‘../utils/HZRecorder.js‘;
function HZRecorder(stream, config) {
config = config || {};
config.sampleBits = config.sampleBits || 16; //采样数位 8, 16
config.sampleRate = config.sampleRate || 16000; //采样率16khz
var context = new (window.webkitAudioContext || window.AudioContext)();
var audioInput = context.createMediaStreamSource(stream);
var createScript = context.createScriptProcessor || context.createJavaScriptNode;
var recorder = createScript.apply(context, [4096, 1, 1]);
var audioData = {
size: 0 //录音文件长度
, buffer: [] //录音缓存
, inputSampleRate: context.sampleRate //输入采样率
, inputSampleBits: 16 //输入采样数位 8, 16
, outputSampleRate: config.sampleRate //输出采样率
, oututSampleBits: config.sampleBits //输出采样数位 8, 16
, input: function (data) {
this.buffer.push(new Float32Array(data));
this.size += data.length;
}
, compress: function () { //合并压缩
//合并
var data = new Float32Array(this.size);
var offset = 0;
for (var i = 0; i this.buffer.length; i++) {
data.set(this.buffer[i], offset);
offset += this.buffer[i].length;
}
//压缩
var compression = parseInt(this.inputSampleRate / this.outputSampleRate);
var length = data.length / compression;
var result = new Float32Array(length);
var index = 0, j = 0;
while (index length) {
result[index] = data[j];
j += compression;
index++;
}
return result;
}
, encodeWAV: function () {
var sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate);
var sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits);
var bytes = this.compress();
var dataLength = bytes.length * (sampleBits / 8);
var buffer = new ArrayBuffer(44 + dataLength);
var data = new DataView(buffer);
var channelCount = 1;//单声道
var offset = 0;
var writeString = function (str) {
for (var i = 0; i ) {
data.setUint8(offset + i, str.charCodeAt(i));
}
}
// 资源交换文件标识符
writeString(‘RIFF‘); offset += 4;
// 下个地址开始到文件尾总字节数,即文件大小-8
data.setUint32(offset, 36 + dataLength, true); offset += 4;
// WAV文件标志
writeString(‘WAVE‘); offset += 4;
// 波形格式标志
writeString(‘fmt ‘); offset += 4;
// 过滤字节,一般为 0x10 = 16
data.setUint32(offset, 16, true); offset += 4;
// 格式类别 (PCM形式采样数据)
data.setUint16(offset, 1, true); offset += 2;
// 通道数
data.setUint16(offset, channelCount, true); offset += 2;
// 采样率,每秒样本数,表示每个通道的播放速度
data.setUint32(offset, sampleRate, true); offset += 4;
// 波形数据传输率 (每秒平均字节数) 单声道×每秒数据位数×每样本数据位/8
data.setUint32(offset, channelCount * sampleRate * (sampleBits / 8), true); offset += 4;
// 快数据调整数 采样一次占用字节数 单声道×每样本的数据位数/8
data.setUint16(offset, channelCount * (sampleBits / 8), true); offset += 2;
// 每样本数据位数
data.setUint16(offset, sampleBits, true); offset += 2;
// 数据标识符
writeString(‘data‘); offset += 4;
// 采样数据总数,即数据总大小-44
data.setUint32(offset, dataLength, true); offset += 4;
// 写入采样数据
if (sampleBits === 8) {
for (var i = 0; i ) {
var s = Math.max(-1, Math.min(1, bytes[i]));
var val = s 0 ? s * 0x8000 : s * 0x7FFF;
val = parseInt(255 / (65535 / (val + 32768)));
data.setInt8(offset, val, true);
}
} else {
for (var i = 0; i 2) {
var s = Math.max(-1, Math.min(1, bytes[i]));
data.setInt16(offset, s 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
return new Blob([data], { type: ‘audio/wav‘ });
}
};
//开始录音
this.start = function () {
audioInput.connect(recorder);
recorder.connect(context.destination);
}
//停止
this.stop = function () {
recorder.disconnect();
}
//获取音频文件
this.getBlob = function () {
this.stop();
return audioData.encodeWAV();
}
//回放
this.play = function (audio) {
var blob=this.getBlob();
// saveAs(blob, "F:/3.wav");
audio.src = window.URL.createObjectURL(this.getBlob());
}
//上传
this.upload = function () {
return this.getBlob()
}
//音频采集
recorder.onaudioprocess = function (e) {
audioData.input(e.inputBuffer.getChannelData(0));
//record(e.inputBuffer.getChannelData(0));
}
return this;
}
export {
HZRecorder
}
文章标题:springBoot+vue+百度语音识别
文章链接:http://soscw.com/index.php/essay/76082.html