VoiceUtil.java
4.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
//package com.bsth.data.zndd.voice;
//
//import com.alibaba.fastjson.JSON;
//import com.alibaba.fastjson.JSONObject;
//import com.sun.media.sound.WaveFileReader;
//import com.sun.media.sound.WaveFileWriter;
//import org.slf4j.Logger;
//import org.slf4j.LoggerFactory;
//import org.springframework.util.Assert;
//import org.springframework.util.StringUtils;
//import org.vosk.LibVosk;
//import org.vosk.LogLevel;
//import org.vosk.Model;
//import org.vosk.Recognizer;
//
//import javax.sound.sampled.*;
//import java.io.*;
//import java.nio.file.Files;
//import java.nio.file.Paths;
//
//public class VoiceUtil {
// /* @Value("${leenleda.vosk.model}")
// private String VOSKMODELPATH;*/
//
// Logger log = LoggerFactory.getLogger(this.getClass());
// public String getWord(String filePath) throws IOException, UnsupportedAudioFileException {
// Assert.isTrue(StringUtils.hasLength("D:\\pcm\\vosk-model-small-cn-0.22"), "无效的VOS模块!");
// byte[] bytes = Files.readAllBytes(Paths.get(filePath));
// // 转换为16KHZ
// reSamplingAndSave(bytes, filePath);
// File f = new File(filePath);
// RandomAccessFile rdf = null;
// rdf = new RandomAccessFile(f, "r");
// log.info("声音尺寸:{}", toInt(read(rdf, 4, 4)));
// log.info("音频格式:{}", toShort(read(rdf, 20, 2)));
// short track = toShort(read(rdf, 22, 2));
// log.info("1 单声道 2 双声道: {}", track);
// log.info("采样率、音频采样级别 16000 = 16KHz: {}", toInt(read(rdf, 24, 4)));
// log.info("每秒波形的数据量:{}", toShort(read(rdf, 22, 2)));
// log.info("采样帧的大小:{}", toShort(read(rdf, 32, 2)));
// log.info("采样位数:{}", toShort(read(rdf, 34, 2)));
// rdf.close();
// LibVosk.setLogLevel(LogLevel.WARNINGS);
// try (
// Model model = new Model("D:\\pcm\\vosk-model-small-cn-0.22");
// InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream(filePath)));
// // 采样率为音频采样率的声道倍数
// Recognizer recognizer = new Recognizer(model, 16000 * track)) {
//
// recognizer.setWords(true);
//
// String result = recognizer.getFinalResult();
// log.info("识别结果:{}", result);
// if (StringUtils.hasLength(result)) {
// JSONObject jsonObject = JSON.parseObject(result);
// return jsonObject.getString("text").replace(" ", "");
// }
// return "";
// }
//
//
//
// }
//
// public static int toInt(byte[] b) {
// return (((b[3] & 0xff) << 24) + ((b[2] & 0xff) << 16) + ((b[1] & 0xff) << 8) + ((b[0] & 0xff) << 0));
// }
//
// public static short toShort(byte[] b) {
// return (short) ((b[1] << 8) + (b[0] << 0));
// }
//
// public static byte[] read(RandomAccessFile rdf, int pos, int length) throws IOException {
// rdf.seek(pos);
// byte result[] = new byte[length];
// for (int i = 0; i < length; i++) {
// result[i] = rdf.readByte();
// }
// return result;
// }
//
// public static void reSamplingAndSave(byte[] data, String path) throws IOException, UnsupportedAudioFileException {
// WaveFileReader reader = new WaveFileReader();
// AudioInputStream audioIn = reader.getAudioInputStream(new ByteArrayInputStream(data));
// AudioFormat srcFormat = audioIn.getFormat();
// int targetSampleRate = 16000;
// AudioFormat dstFormat = new AudioFormat(srcFormat.getEncoding(),
// targetSampleRate,
// srcFormat.getSampleSizeInBits(),
// srcFormat.getChannels(),
// srcFormat.getFrameSize(),
// srcFormat.getFrameRate(),
// srcFormat.isBigEndian());
// AudioInputStream convertedIn = AudioSystem.getAudioInputStream(dstFormat, audioIn);
// File file = new File(path);
// WaveFileWriter writer = new WaveFileWriter();
// writer.write(convertedIn, AudioFileFormat.Type.WAVE, file);
// }*/
//}