VoiceUtil.java 3.94 KB
package com.bsth.data.zndd.voice;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.sun.media.sound.WaveFileReader;
import com.sun.media.sound.WaveFileWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;
import org.vosk.LibVosk;
import org.vosk.LogLevel;
import org.vosk.Model;
import org.vosk.Recognizer;

import javax.sound.sampled.*;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;

public class VoiceUtil {
   /* @Value("${leenleda.vosk.model}")
    private String VOSKMODELPATH;*/

    Logger log = LoggerFactory.getLogger(this.getClass());
    public String getWord(String filePath) throws IOException, UnsupportedAudioFileException {
        Assert.isTrue(StringUtils.hasLength("D:\\pcm\\vosk-model-small-cn-0.22"), "无效的VOS模块!");
        byte[] bytes = Files.readAllBytes(Paths.get(filePath));
        // 转换为16KHZ
        reSamplingAndSave(bytes, filePath);
        File f = new File(filePath);
        RandomAccessFile rdf = null;
        rdf = new RandomAccessFile(f, "r");
        log.info("声音尺寸:{}", toInt(read(rdf, 4, 4)));
        log.info("音频格式:{}", toShort(read(rdf, 20, 2)));
        short track = toShort(read(rdf, 22, 2));
        log.info("1 单声道 2 双声道: {}", track);
        log.info("采样率、音频采样级别 16000 = 16KHz: {}", toInt(read(rdf, 24, 4)));
        log.info("每秒波形的数据量:{}", toShort(read(rdf, 22, 2)));
        log.info("采样帧的大小:{}", toShort(read(rdf, 32, 2)));
        log.info("采样位数:{}", toShort(read(rdf, 34, 2)));
        rdf.close();
        LibVosk.setLogLevel(LogLevel.WARNINGS);
        try (
             Model model = new Model("D:\\pcm\\vosk-model-small-cn-0.22");
             InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream(filePath)));
             // 采样率为音频采样率的声道倍数
             Recognizer recognizer = new Recognizer(model, 16000 * track)) {
            
            recognizer.setWords(true);
            
            String result = recognizer.getFinalResult();
            log.info("识别结果:{}", result);
            if (StringUtils.hasLength(result)) {
                JSONObject jsonObject = JSON.parseObject(result);
                return jsonObject.getString("text").replace(" ", "");
            }
            return "";
        }
        
        
        
    }

    public static int toInt(byte[] b) {
        return (((b[3] & 0xff) << 24) + ((b[2] & 0xff) << 16) + ((b[1] & 0xff) << 8) + ((b[0] & 0xff) << 0));
    }

    public static short toShort(byte[] b) {
        return (short) ((b[1] << 8) + (b[0] << 0));
    }

    public static byte[] read(RandomAccessFile rdf, int pos, int length) throws IOException {
        rdf.seek(pos);
        byte result[] = new byte[length];
        for (int i = 0; i < length; i++) {
            result[i] = rdf.readByte();
        }
        return result;
    }

    public static void reSamplingAndSave(byte[] data, String path) throws IOException, UnsupportedAudioFileException {
        WaveFileReader reader = new WaveFileReader();
        AudioInputStream audioIn = reader.getAudioInputStream(new ByteArrayInputStream(data));
        AudioFormat srcFormat = audioIn.getFormat();
        int targetSampleRate = 16000;
        AudioFormat dstFormat = new AudioFormat(srcFormat.getEncoding(),
                targetSampleRate,
                srcFormat.getSampleSizeInBits(),
                srcFormat.getChannels(),
                srcFormat.getFrameSize(),
                srcFormat.getFrameRate(),
                srcFormat.isBigEndian());
        AudioInputStream convertedIn = AudioSystem.getAudioInputStream(dstFormat, audioIn);
        File file = new File(path);
        WaveFileWriter writer = new WaveFileWriter();
        writer.write(convertedIn, AudioFileFormat.Type.WAVE, file);
    }
}