web-assembly-asr-sherpa-onnx-en / sherpa-onnx-asr.js
csukuangfj's picture
update model
8fef599
function freeConfig(config, Module) {
if ('buffer' in config) {
Module._free(config.buffer);
}
if ('config' in config) {
freeConfig(config.config, Module)
}
if ('transducer' in config) {
freeConfig(config.transducer, Module)
}
if ('paraformer' in config) {
freeConfig(config.paraformer, Module)
}
if ('ctc' in config) {
freeConfig(config.ctc, Module)
}
if ('feat' in config) {
freeConfig(config.feat, Module)
}
if ('model' in config) {
freeConfig(config.model, Module)
}
if ('nemoCtc' in config) {
freeConfig(config.nemoCtc, Module)
}
if ('whisper' in config) {
freeConfig(config.whisper, Module)
}
if ('moonshine' in config) {
freeConfig(config.moonshine, Module)
}
if ('tdnn' in config) {
freeConfig(config.tdnn, Module)
}
if ('senseVoice' in config) {
freeConfig(config.senseVoice, Module)
}
if ('lm' in config) {
freeConfig(config.lm, Module)
}
if ('ctcFstDecoder' in config) {
freeConfig(config.ctcFstDecoder, Module)
}
Module._free(config.ptr);
}
// The user should free the returned pointers
function initSherpaOnnxOnlineTransducerModelConfig(config, Module) {
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1;
const n = encoderLen + decoderLen + joinerLen;
const buffer = Module._malloc(n);
const len = 3 * 4; // 3 pointers
const ptr = Module._malloc(len);
let offset = 0;
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
offset += encoderLen;
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
offset += decoderLen;
Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen);
offset = 0;
Module.setValue(ptr, buffer + offset, 'i8*');
offset += encoderLen;
Module.setValue(ptr + 4, buffer + offset, 'i8*');
offset += decoderLen;
Module.setValue(ptr + 8, buffer + offset, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOnlineParaformerModelConfig(config, Module) {
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
const n = encoderLen + decoderLen;
const buffer = Module._malloc(n);
const len = 2 * 4; // 2 pointers
const ptr = Module._malloc(len);
let offset = 0;
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
offset += encoderLen;
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
offset = 0;
Module.setValue(ptr, buffer + offset, 'i8*');
offset += encoderLen;
Module.setValue(ptr + 4, buffer + offset, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model || '') + 1;
const buffer = Module._malloc(n);
const len = 1 * 4; // 1 pointer
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model || '', buffer, n);
Module.setValue(ptr, buffer, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOnlineModelConfig(config, Module) {
if (!('transducer' in config)) {
config.transducer = {
encoder: '',
decoder: '',
joiner: '',
};
}
if (!('paraformer' in config)) {
config.paraformer = {
encoder: '',
decoder: '',
};
}
if (!('zipformer2Ctc' in config)) {
config.zipformer2Ctc = {
model: '',
};
}
if (!('tokensBuf' in config)) {
config.tokensBuf = '';
}
if (!('tokensBufSize' in config)) {
config.tokensBufSize = 0;
}
const transducer =
initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module);
const paraformer =
initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module);
const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(
config.zipformer2Ctc, Module);
const len = transducer.len + paraformer.len + ctc.len + 9 * 4;
const ptr = Module._malloc(len);
let offset = 0;
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
offset += transducer.len;
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
offset += paraformer.len;
Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset);
offset += ctc.len;
const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
const tokensBufLen = Module.lengthBytesUTF8(config.tokensBuf || '') + 1;
const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen +
bpeVocabLen + tokensBufLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
Module.stringToUTF8(config.tokens || '', buffer, tokensLen);
offset += tokensLen;
Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
offset += providerLen;
Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen);
offset += modelTypeLen;
Module.stringToUTF8(
config.modelingUnit || '', buffer + offset, modelingUnitLen);
offset += modelingUnitLen;
Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen);
offset += bpeVocabLen;
Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen);
offset += tokensBufLen;
offset = transducer.len + paraformer.len + ctc.len;
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
offset += 4;
Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
offset += 4;
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
offset += 4;
Module.setValue(ptr + offset, config.debug || 0, 'i32');
offset += 4;
Module.setValue(
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
offset += 4;
Module.setValue(
ptr + offset, buffer + tokensLen + providerLen + modelTypeLen,
'i8*'); // modelingUnit
offset += 4;
Module.setValue(
ptr + offset,
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen,
'i8*'); // bpeVocab
offset += 4;
Module.setValue(
ptr + offset,
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen +
bpeVocabLen,
'i8*'); // tokens_buf
offset += 4;
Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32');
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
paraformer: paraformer, ctc: ctc
}
}
function initSherpaOnnxFeatureConfig(config, Module) {
const len = 2 * 4; // 2 pointers
const ptr = Module._malloc(len);
Module.setValue(ptr, config.sampleRate || 16000, 'i32');
Module.setValue(ptr + 4, config.featureDim || 80, 'i32');
return {ptr: ptr, len: len};
}
function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) {
const len = 2 * 4;
const ptr = Module._malloc(len);
const graphLen = Module.lengthBytesUTF8(config.graph || '') + 1;
const buffer = Module._malloc(graphLen);
Module.stringToUTF8(config.graph, buffer, graphLen);
Module.setValue(ptr, buffer, 'i8*');
Module.setValue(ptr + 4, config.maxActive || 3000, 'i32');
return {ptr: ptr, len: len, buffer: buffer};
}
function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
if (!('featConfig' in config)) {
config.featConfig = {
sampleRate: 16000,
featureDim: 80,
};
}
if (!('ctcFstDecoderConfig' in config)) {
config.ctcFstDecoderConfig = {
graph: '',
maxActive: 3000,
};
}
if (!('hotwordsBuf' in config)) {
config.hotwordsBuf = '';
}
if (!('hotwordsBufSize' in config)) {
config.hotwordsBufSize = 0;
}
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
config.ctcFstDecoderConfig, Module)
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4;
const ptr = Module._malloc(len);
let offset = 0;
Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
offset += feat.len;
Module._CopyHeap(model.ptr, model.len, ptr + offset);
offset += model.len;
const decodingMethodLen =
Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1;
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1;
const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
const hotwordsBufLen = Module.lengthBytesUTF8(config.hotwordsBuf || '') + 1;
const bufferLen = decodingMethodLen + hotwordsFileLen + ruleFstsFileLen +
ruleFarsFileLen + hotwordsBufLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
Module.stringToUTF8(
config.decodingMethod || 'greedy_search', buffer, decodingMethodLen);
offset += decodingMethodLen;
Module.stringToUTF8(
config.hotwordsFile || '', buffer + offset, hotwordsFileLen);
offset += hotwordsFileLen;
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen);
offset += ruleFstsFileLen;
Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsFileLen);
offset += ruleFarsFileLen;
Module.stringToUTF8(
config.hotwordsBuf || '', buffer + offset, hotwordsBufLen);
offset += hotwordsBufLen;
offset = feat.len + model.len;
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
offset += 4;
Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32');
offset += 4;
Module.setValue(ptr + offset, config.enableEndpoint || 0, 'i32');
offset += 4;
Module.setValue(ptr + offset, config.rule1MinTrailingSilence || 2.4, 'float');
offset += 4;
Module.setValue(ptr + offset, config.rule2MinTrailingSilence || 1.2, 'float');
offset += 4;
Module.setValue(ptr + offset, config.rule3MinUtteranceLength || 20, 'float');
offset += 4;
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
offset += 4;
Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float');
offset += 4;
Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset);
offset += ctcFstDecoder.len;
Module.setValue(
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*');
offset += 4;
Module.setValue(
ptr + offset,
buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen, 'i8*');
offset += 4;
Module.setValue(ptr + offset, config.blankPenalty || 0, 'float');
offset += 4;
Module.setValue(
ptr + offset,
buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen +
ruleFarsFileLen,
'i8*');
offset += 4;
Module.setValue(ptr + offset, config.hotwordsBufSize || 0, 'i32');
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model,
ctcFstDecoder: ctcFstDecoder
}
}
function createOnlineRecognizer(Module, myConfig) {
const onlineTransducerModelConfig = {
encoder: '',
decoder: '',
joiner: '',
};
const onlineParaformerModelConfig = {
encoder: '',
decoder: '',
};
const onlineZipformer2CtcModelConfig = {
model: '',
};
let type = 0;
switch (type) {
case 0:
// transducer
onlineTransducerModelConfig.encoder = './encoder.onnx';
onlineTransducerModelConfig.decoder = './decoder.onnx';
onlineTransducerModelConfig.joiner = './joiner.onnx';
break;
case 1:
// paraformer
onlineParaformerModelConfig.encoder = './encoder.onnx';
onlineParaformerModelConfig.decoder = './decoder.onnx';
break;
case 2:
// ctc
onlineZipformer2CtcModelConfig.model = './encoder.onnx';
break;
}
const onlineModelConfig = {
transducer: onlineTransducerModelConfig,
paraformer: onlineParaformerModelConfig,
zipformer2Ctc: onlineZipformer2CtcModelConfig,
tokens: './tokens.txt',
numThreads: 1,
provider: 'cpu',
debug: 1,
modelType: '',
modelingUnit: 'cjkchar',
bpeVocab: '',
};
const featureConfig = {
sampleRate: 16000,
featureDim: 80,
};
let recognizerConfig = {
featConfig: featureConfig,
modelConfig: onlineModelConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
enableEndpoint: 1,
rule1MinTrailingSilence: 2.4,
rule2MinTrailingSilence: 1.2,
rule3MinUtteranceLength: 20,
hotwordsFile: '',
hotwordsScore: 1.5,
ctcFstDecoderConfig: {
graph: '',
maxActive: 3000,
},
ruleFsts: '',
ruleFars: '',
};
if (myConfig) {
recognizerConfig = myConfig;
}
return new OnlineRecognizer(recognizerConfig, Module);
}
function initSherpaOnnxOfflineTransducerModelConfig(config, Module) {
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1;
const n = encoderLen + decoderLen + joinerLen;
const buffer = Module._malloc(n);
const len = 3 * 4; // 3 pointers
const ptr = Module._malloc(len);
let offset = 0;
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
offset += encoderLen;
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
offset += decoderLen;
Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen);
offset = 0;
Module.setValue(ptr, buffer + offset, 'i8*');
offset += encoderLen;
Module.setValue(ptr + 4, buffer + offset, 'i8*');
offset += decoderLen;
Module.setValue(ptr + 8, buffer + offset, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineParaformerModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model || '') + 1;
const buffer = Module._malloc(n);
const len = 1 * 4; // 1 pointer
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model || '', buffer, n);
Module.setValue(ptr, buffer, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model || '') + 1;
const buffer = Module._malloc(n);
const len = 1 * 4; // 1 pointer
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model || '', buffer, n);
Module.setValue(ptr, buffer, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
const languageLen = Module.lengthBytesUTF8(config.language || '') + 1;
const taskLen = Module.lengthBytesUTF8(config.task || '') + 1;
const n = encoderLen + decoderLen + languageLen + taskLen;
const buffer = Module._malloc(n);
const len = 5 * 4; // 4 pointers + 1 int32
const ptr = Module._malloc(len);
let offset = 0;
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
offset += encoderLen;
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
offset += decoderLen;
Module.stringToUTF8(config.language || '', buffer + offset, languageLen);
offset += languageLen;
Module.stringToUTF8(config.task || '', buffer + offset, taskLen);
offset = 0;
Module.setValue(ptr, buffer + offset, 'i8*');
offset += encoderLen;
Module.setValue(ptr + 4, buffer + offset, 'i8*');
offset += decoderLen;
Module.setValue(ptr + 8, buffer + offset, 'i8*');
offset += languageLen;
Module.setValue(ptr + 12, buffer + offset, 'i8*');
offset += taskLen;
Module.setValue(ptr + 16, config.tailPaddings || 2000, 'i32');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) {
const preprocessorLen = Module.lengthBytesUTF8(config.preprocessor || '') + 1;
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
const uncachedDecoderLen =
Module.lengthBytesUTF8(config.uncachedDecoder || '') + 1;
const cachedDecoderLen =
Module.lengthBytesUTF8(config.cachedDecoder || '') + 1;
const n =
preprocessorLen + encoderLen + uncachedDecoderLen + cachedDecoderLen;
const buffer = Module._malloc(n);
const len = 4 * 4; // 4 pointers
const ptr = Module._malloc(len);
let offset = 0;
Module.stringToUTF8(
config.preprocessor || '', buffer + offset, preprocessorLen);
offset += preprocessorLen;
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
offset += encoderLen;
Module.stringToUTF8(
config.uncachedDecoder || '', buffer + offset, uncachedDecoderLen);
offset += uncachedDecoderLen;
Module.stringToUTF8(
config.cachedDecoder || '', buffer + offset, cachedDecoderLen);
offset += cachedDecoderLen;
offset = 0;
Module.setValue(ptr, buffer + offset, 'i8*');
offset += preprocessorLen;
Module.setValue(ptr + 4, buffer + offset, 'i8*');
offset += encoderLen;
Module.setValue(ptr + 8, buffer + offset, 'i8*');
offset += uncachedDecoderLen;
Module.setValue(ptr + 12, buffer + offset, 'i8*');
offset += cachedDecoderLen;
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model || '') + 1;
const buffer = Module._malloc(n);
const len = 1 * 4; // 1 pointer
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model || '', buffer, n);
Module.setValue(ptr, buffer, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineSenseVoiceModelConfig(config, Module) {
const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
const languageLen = Module.lengthBytesUTF8(config.language || '') + 1;
// useItn is a integer with 4 bytes
const n = modelLen + languageLen;
const buffer = Module._malloc(n);
const len = 3 * 4; // 2 pointers + 1 int
const ptr = Module._malloc(len);
let offset = 0;
Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
offset += modelLen;
Module.stringToUTF8(config.language || '', buffer + offset, languageLen);
offset += languageLen;
offset = 0;
Module.setValue(ptr, buffer + offset, 'i8*');
offset += modelLen;
Module.setValue(ptr + 4, buffer + offset, 'i8*');
offset += languageLen;
Module.setValue(ptr + 8, config.useInverseTextNormalization || 0, 'i32');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineLMConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model || '') + 1;
const buffer = Module._malloc(n);
const len = 2 * 4;
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model || '', buffer, n);
Module.setValue(ptr, buffer, 'i8*');
Module.setValue(ptr + 4, config.scale || 1, 'float');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineModelConfig(config, Module) {
if (!('transducer' in config)) {
config.transducer = {
encoder: '',
decoder: '',
joiner: '',
};
}
if (!('paraformer' in config)) {
config.paraformer = {
model: '',
};
}
if (!('nemoCtc' in config)) {
config.nemoCtc = {
model: '',
};
}
if (!('whisper' in config)) {
config.whisper = {
encoder: '',
decoder: '',
language: '',
task: '',
tailPaddings: -1,
};
}
if (!('moonshine' in config)) {
config.moonshine = {
preprocessor: '',
encoder: '',
uncachedDecoder: '',
cachedDecoder: '',
};
}
if (!('tdnn' in config)) {
config.tdnn = {
model: '',
};
}
if (!('senseVoice' in config)) {
config.senseVoice = {
model: '',
language: '',
useInverseTextNormalization: 0,
};
}
const transducer =
initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module);
const paraformer =
initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module);
const nemoCtc =
initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module);
const whisper =
initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module);
const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module);
const senseVoice =
initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module);
const moonshine =
initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module);
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len;
const ptr = Module._malloc(len);
let offset = 0;
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
offset += transducer.len;
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
offset += paraformer.len;
Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
offset += nemoCtc.len;
Module._CopyHeap(whisper.ptr, whisper.len, ptr + offset);
offset += whisper.len;
Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset);
offset += tdnn.len;
const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
const teleSpeechCtcLen =
Module.lengthBytesUTF8(config.teleSpeechCtc || '') + 1;
const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen +
bpeVocabLen + teleSpeechCtcLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
Module.stringToUTF8(config.tokens, buffer, tokensLen);
offset += tokensLen;
Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
offset += providerLen;
Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen);
offset += modelTypeLen;
Module.stringToUTF8(
config.modelingUnit || '', buffer + offset, modelingUnitLen);
offset += modelingUnitLen;
Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen);
offset += bpeVocabLen;
Module.stringToUTF8(
config.teleSpeechCtc || '', buffer + offset, teleSpeechCtcLen);
offset += teleSpeechCtcLen;
offset =
transducer.len + paraformer.len + nemoCtc.len + whisper.len + tdnn.len;
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
offset += 4;
Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
offset += 4;
Module.setValue(ptr + offset, config.debug || 0, 'i32');
offset += 4;
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
offset += 4;
Module.setValue(
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
offset += 4;
Module.setValue(
ptr + offset, buffer + tokensLen + providerLen + modelTypeLen,
'i8*'); // modelingUnit
offset += 4;
Module.setValue(
ptr + offset,
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen,
'i8*'); // bpeVocab
offset += 4;
Module.setValue(
ptr + offset,
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen +
bpeVocabLen,
'i8*'); // teleSpeechCtc
offset += 4;
Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset);
offset += senseVoice.len;
Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset);
return {
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
senseVoice: senseVoice, moonshine: moonshine,
}
}
function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
if (!('featConfig' in config)) {
config.featConfig = {
sampleRate: 16000,
featureDim: 80,
};
}
if (!('lmConfig' in config)) {
config.lmConfig = {
model: '',
scale: 1.0,
};
}
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
const len = feat.len + model.len + lm.len + 7 * 4;
const ptr = Module._malloc(len);
let offset = 0;
Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
offset += feat.len;
Module._CopyHeap(model.ptr, model.len, ptr + offset);
offset += model.len;
Module._CopyHeap(lm.ptr, lm.len, ptr + offset);
offset += lm.len;
const decodingMethodLen =
Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1;
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1;
const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
const bufferLen =
decodingMethodLen + hotwordsFileLen + ruleFstsLen + ruleFarsLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
Module.stringToUTF8(
config.decodingMethod || 'greedy_search', buffer, decodingMethodLen);
offset += decodingMethodLen;
Module.stringToUTF8(
config.hotwordsFile || '', buffer + offset, hotwordsFileLen);
offset += hotwordsFileLen;
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen);
offset += ruleFstsLen;
Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsLen);
offset += ruleFarsLen;
offset = feat.len + model.len + lm.len;
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
offset += 4;
Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32');
offset += 4;
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
offset += 4;
Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float');
offset += 4;
Module.setValue(
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*');
offset += 4;
Module.setValue(
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen + ruleFstsLen,
'i8*');
offset += 4;
Module.setValue(ptr + offset, config.blankPenalty || 0, 'float');
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
}
}
class OfflineStream {
constructor(handle, Module) {
this.handle = handle;
this.Module = Module;
}
free() {
if (this.handle) {
this.Module._SherpaOnnxDestroyOfflineStream(this.handle);
this.handle = null;
}
}
/**
* @param sampleRate {Number}
* @param samples {Float32Array} Containing samples in the range [-1, 1]
*/
acceptWaveform(sampleRate, samples) {
const pointer =
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
this.Module._SherpaOnnxAcceptWaveformOffline(
this.handle, sampleRate, pointer, samples.length);
this.Module._free(pointer);
}
};
class OfflineRecognizer {
constructor(configObj, Module) {
this.config = configObj;
const config = initSherpaOnnxOfflineRecognizerConfig(configObj, Module);
const handle = Module._SherpaOnnxCreateOfflineRecognizer(config.ptr);
freeConfig(config, Module);
this.handle = handle;
this.Module = Module;
}
free() {
this.Module._SherpaOnnxDestroyOfflineRecognizer(this.handle);
this.handle = 0
}
createStream() {
const handle = this.Module._SherpaOnnxCreateOfflineStream(this.handle);
return new OfflineStream(handle, this.Module);
}
decode(stream) {
this.Module._SherpaOnnxDecodeOfflineStream(this.handle, stream.handle);
}
getResult(stream) {
const r =
this.Module._SherpaOnnxGetOfflineStreamResultAsJson(stream.handle);
const jsonStr = this.Module.UTF8ToString(r);
const ans = JSON.parse(jsonStr);
this.Module._SherpaOnnxDestroyOfflineStreamResultJson(r);
return ans;
}
};
class OnlineStream {
constructor(handle, Module) {
this.handle = handle;
this.pointer = null; // buffer
this.n = 0; // buffer size
this.Module = Module;
}
free() {
if (this.handle) {
this.Module._SherpaOnnxDestroyOnlineStream(this.handle);
this.handle = null;
this.Module._free(this.pointer);
this.pointer = null;
this.n = 0;
}
}
/**
* @param sampleRate {Number}
* @param samples {Float32Array} Containing samples in the range [-1, 1]
*/
acceptWaveform(sampleRate, samples) {
if (this.n < samples.length) {
this.Module._free(this.pointer);
this.pointer =
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
this.n = samples.length
}
this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
this.Module._SherpaOnnxOnlineStreamAcceptWaveform(
this.handle, sampleRate, this.pointer, samples.length);
}
inputFinished() {
this.Module._SherpaOnnxOnlineStreamInputFinished(this.handle);
}
};
class OnlineRecognizer {
constructor(configObj, Module) {
this.config = configObj;
const config = initSherpaOnnxOnlineRecognizerConfig(configObj, Module)
const handle = Module._SherpaOnnxCreateOnlineRecognizer(config.ptr);
freeConfig(config, Module);
this.handle = handle;
this.Module = Module;
}
free() {
this.Module._SherpaOnnxDestroyOnlineRecognizer(this.handle);
this.handle = 0
}
createStream() {
const handle = this.Module._SherpaOnnxCreateOnlineStream(this.handle);
return new OnlineStream(handle, this.Module);
}
isReady(stream) {
return this.Module._SherpaOnnxIsOnlineStreamReady(
this.handle, stream.handle) == 1;
}
decode(stream) {
this.Module._SherpaOnnxDecodeOnlineStream(this.handle, stream.handle);
}
isEndpoint(stream) {
return this.Module._SherpaOnnxOnlineStreamIsEndpoint(
this.handle, stream.handle) == 1;
}
reset(stream) {
this.Module._SherpaOnnxOnlineStreamReset(this.handle, stream.handle);
}
getResult(stream) {
const r = this.Module._SherpaOnnxGetOnlineStreamResultAsJson(
this.handle, stream.handle);
const jsonStr = this.Module.UTF8ToString(r);
const ans = JSON.parse(jsonStr);
this.Module._SherpaOnnxDestroyOnlineStreamResultJson(r);
return ans;
}
}
if (typeof process == 'object' && typeof process.versions == 'object' &&
typeof process.versions.node == 'string') {
module.exports = {
createOnlineRecognizer,
OfflineRecognizer,
};
}