|
function freeConfig(config, Module) { |
|
if ('buffer' in config) { |
|
Module._free(config.buffer); |
|
} |
|
|
|
if ('config' in config) { |
|
freeConfig(config.config, Module) |
|
} |
|
|
|
if ('transducer' in config) { |
|
freeConfig(config.transducer, Module) |
|
} |
|
|
|
if ('paraformer' in config) { |
|
freeConfig(config.paraformer, Module) |
|
} |
|
|
|
if ('ctc' in config) { |
|
freeConfig(config.ctc, Module) |
|
} |
|
|
|
if ('feat' in config) { |
|
freeConfig(config.feat, Module) |
|
} |
|
|
|
if ('model' in config) { |
|
freeConfig(config.model, Module) |
|
} |
|
|
|
if ('nemoCtc' in config) { |
|
freeConfig(config.nemoCtc, Module) |
|
} |
|
|
|
if ('whisper' in config) { |
|
freeConfig(config.whisper, Module) |
|
} |
|
|
|
if ('moonshine' in config) { |
|
freeConfig(config.moonshine, Module) |
|
} |
|
|
|
if ('tdnn' in config) { |
|
freeConfig(config.tdnn, Module) |
|
} |
|
|
|
if ('senseVoice' in config) { |
|
freeConfig(config.senseVoice, Module) |
|
} |
|
|
|
if ('lm' in config) { |
|
freeConfig(config.lm, Module) |
|
} |
|
|
|
if ('ctcFstDecoder' in config) { |
|
freeConfig(config.ctcFstDecoder, Module) |
|
} |
|
|
|
Module._free(config.ptr); |
|
} |
|
|
|
|
|
function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { |
|
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; |
|
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; |
|
const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1; |
|
|
|
const n = encoderLen + decoderLen + joinerLen; |
|
|
|
const buffer = Module._malloc(n); |
|
|
|
const len = 3 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); |
|
offset += encoderLen; |
|
|
|
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); |
|
offset += decoderLen; |
|
|
|
Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen); |
|
|
|
offset = 0; |
|
Module.setValue(ptr, buffer + offset, 'i8*'); |
|
offset += encoderLen; |
|
|
|
Module.setValue(ptr + 4, buffer + offset, 'i8*'); |
|
offset += decoderLen; |
|
|
|
Module.setValue(ptr + 8, buffer + offset, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { |
|
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; |
|
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; |
|
|
|
const n = encoderLen + decoderLen; |
|
const buffer = Module._malloc(n); |
|
|
|
const len = 2 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); |
|
offset += encoderLen; |
|
|
|
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); |
|
|
|
offset = 0; |
|
Module.setValue(ptr, buffer + offset, 'i8*'); |
|
offset += encoderLen; |
|
|
|
Module.setValue(ptr + 4, buffer + offset, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { |
|
const n = Module.lengthBytesUTF8(config.model || '') + 1; |
|
const buffer = Module._malloc(n); |
|
|
|
const len = 1 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
Module.stringToUTF8(config.model || '', buffer, n); |
|
|
|
Module.setValue(ptr, buffer, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOnlineModelConfig(config, Module) { |
|
if (!('transducer' in config)) { |
|
config.transducer = { |
|
encoder: '', |
|
decoder: '', |
|
joiner: '', |
|
}; |
|
} |
|
|
|
if (!('paraformer' in config)) { |
|
config.paraformer = { |
|
encoder: '', |
|
decoder: '', |
|
}; |
|
} |
|
|
|
if (!('zipformer2Ctc' in config)) { |
|
config.zipformer2Ctc = { |
|
model: '', |
|
}; |
|
} |
|
|
|
if (!('tokensBuf' in config)) { |
|
config.tokensBuf = ''; |
|
} |
|
|
|
if (!('tokensBufSize' in config)) { |
|
config.tokensBufSize = 0; |
|
} |
|
|
|
const transducer = |
|
initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); |
|
|
|
const paraformer = |
|
initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module); |
|
|
|
const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig( |
|
config.zipformer2Ctc, Module); |
|
|
|
const len = transducer.len + paraformer.len + ctc.len + 9 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); |
|
offset += transducer.len; |
|
|
|
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); |
|
offset += paraformer.len; |
|
|
|
Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset); |
|
offset += ctc.len; |
|
|
|
const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; |
|
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; |
|
const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1; |
|
const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; |
|
const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; |
|
const tokensBufLen = Module.lengthBytesUTF8(config.tokensBuf || '') + 1; |
|
|
|
const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen + |
|
bpeVocabLen + tokensBufLen; |
|
const buffer = Module._malloc(bufferLen); |
|
|
|
offset = 0; |
|
Module.stringToUTF8(config.tokens || '', buffer, tokensLen); |
|
offset += tokensLen; |
|
|
|
Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); |
|
offset += providerLen; |
|
|
|
Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen); |
|
offset += modelTypeLen; |
|
|
|
Module.stringToUTF8( |
|
config.modelingUnit || '', buffer + offset, modelingUnitLen); |
|
offset += modelingUnitLen; |
|
|
|
Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen); |
|
offset += bpeVocabLen; |
|
|
|
Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen); |
|
offset += tokensBufLen; |
|
|
|
offset = transducer.len + paraformer.len + ctc.len; |
|
Module.setValue(ptr + offset, buffer, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.debug || 0, 'i32'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, buffer + tokensLen + providerLen + modelTypeLen, |
|
'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, |
|
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen, |
|
'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, |
|
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen + |
|
bpeVocabLen, |
|
'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32'); |
|
offset += 4; |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, transducer: transducer, |
|
paraformer: paraformer, ctc: ctc |
|
} |
|
} |
|
|
|
function initSherpaOnnxFeatureConfig(config, Module) { |
|
const len = 2 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
Module.setValue(ptr, config.sampleRate || 16000, 'i32'); |
|
Module.setValue(ptr + 4, config.featureDim || 80, 'i32'); |
|
return {ptr: ptr, len: len}; |
|
} |
|
|
|
function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) { |
|
const len = 2 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
const graphLen = Module.lengthBytesUTF8(config.graph || '') + 1; |
|
const buffer = Module._malloc(graphLen); |
|
Module.stringToUTF8(config.graph, buffer, graphLen); |
|
|
|
Module.setValue(ptr, buffer, 'i8*'); |
|
Module.setValue(ptr + 4, config.maxActive || 3000, 'i32'); |
|
return {ptr: ptr, len: len, buffer: buffer}; |
|
} |
|
|
|
function initSherpaOnnxOnlineRecognizerConfig(config, Module) { |
|
if (!('featConfig' in config)) { |
|
config.featConfig = { |
|
sampleRate: 16000, |
|
featureDim: 80, |
|
}; |
|
} |
|
|
|
if (!('ctcFstDecoderConfig' in config)) { |
|
config.ctcFstDecoderConfig = { |
|
graph: '', |
|
maxActive: 3000, |
|
}; |
|
} |
|
|
|
if (!('hotwordsBuf' in config)) { |
|
config.hotwordsBuf = ''; |
|
} |
|
|
|
if (!('hotwordsBufSize' in config)) { |
|
config.hotwordsBufSize = 0; |
|
} |
|
|
|
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); |
|
const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module); |
|
const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig( |
|
config.ctcFstDecoderConfig, Module) |
|
|
|
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module._CopyHeap(feat.ptr, feat.len, ptr + offset); |
|
offset += feat.len; |
|
|
|
Module._CopyHeap(model.ptr, model.len, ptr + offset); |
|
offset += model.len; |
|
|
|
const decodingMethodLen = |
|
Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1; |
|
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1; |
|
const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; |
|
const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; |
|
const hotwordsBufLen = Module.lengthBytesUTF8(config.hotwordsBuf || '') + 1; |
|
const bufferLen = decodingMethodLen + hotwordsFileLen + ruleFstsFileLen + |
|
ruleFarsFileLen + hotwordsBufLen; |
|
const buffer = Module._malloc(bufferLen); |
|
|
|
offset = 0; |
|
Module.stringToUTF8( |
|
config.decodingMethod || 'greedy_search', buffer, decodingMethodLen); |
|
offset += decodingMethodLen; |
|
|
|
Module.stringToUTF8( |
|
config.hotwordsFile || '', buffer + offset, hotwordsFileLen); |
|
offset += hotwordsFileLen; |
|
|
|
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen); |
|
offset += ruleFstsFileLen; |
|
|
|
Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsFileLen); |
|
offset += ruleFarsFileLen; |
|
|
|
Module.stringToUTF8( |
|
config.hotwordsBuf || '', buffer + offset, hotwordsBufLen); |
|
offset += hotwordsBufLen; |
|
|
|
offset = feat.len + model.len; |
|
Module.setValue(ptr + offset, buffer, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.enableEndpoint || 0, 'i32'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.rule1MinTrailingSilence || 2.4, 'float'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.rule2MinTrailingSilence || 1.2, 'float'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.rule3MinUtteranceLength || 20, 'float'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float'); |
|
offset += 4; |
|
|
|
Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset); |
|
offset += ctcFstDecoder.len; |
|
|
|
Module.setValue( |
|
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, |
|
buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.blankPenalty || 0, 'float'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, |
|
buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen + |
|
ruleFarsFileLen, |
|
'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.hotwordsBufSize || 0, 'i32'); |
|
offset += 4; |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, |
|
ctcFstDecoder: ctcFstDecoder |
|
} |
|
} |
|
|
|
function createOnlineRecognizer(Module, myConfig) { |
|
const onlineTransducerModelConfig = { |
|
encoder: '', |
|
decoder: '', |
|
joiner: '', |
|
}; |
|
|
|
const onlineParaformerModelConfig = { |
|
encoder: '', |
|
decoder: '', |
|
}; |
|
|
|
const onlineZipformer2CtcModelConfig = { |
|
model: '', |
|
}; |
|
|
|
let type = 0; |
|
|
|
switch (type) { |
|
case 0: |
|
|
|
onlineTransducerModelConfig.encoder = './encoder.onnx'; |
|
onlineTransducerModelConfig.decoder = './decoder.onnx'; |
|
onlineTransducerModelConfig.joiner = './joiner.onnx'; |
|
break; |
|
case 1: |
|
|
|
onlineParaformerModelConfig.encoder = './encoder.onnx'; |
|
onlineParaformerModelConfig.decoder = './decoder.onnx'; |
|
break; |
|
case 2: |
|
|
|
onlineZipformer2CtcModelConfig.model = './encoder.onnx'; |
|
break; |
|
} |
|
|
|
|
|
const onlineModelConfig = { |
|
transducer: onlineTransducerModelConfig, |
|
paraformer: onlineParaformerModelConfig, |
|
zipformer2Ctc: onlineZipformer2CtcModelConfig, |
|
tokens: './tokens.txt', |
|
numThreads: 1, |
|
provider: 'cpu', |
|
debug: 1, |
|
modelType: '', |
|
modelingUnit: 'cjkchar', |
|
bpeVocab: '', |
|
}; |
|
|
|
const featureConfig = { |
|
sampleRate: 16000, |
|
featureDim: 80, |
|
}; |
|
|
|
let recognizerConfig = { |
|
featConfig: featureConfig, |
|
modelConfig: onlineModelConfig, |
|
decodingMethod: 'greedy_search', |
|
maxActivePaths: 4, |
|
enableEndpoint: 1, |
|
rule1MinTrailingSilence: 2.4, |
|
rule2MinTrailingSilence: 1.2, |
|
rule3MinUtteranceLength: 20, |
|
hotwordsFile: '', |
|
hotwordsScore: 1.5, |
|
ctcFstDecoderConfig: { |
|
graph: '', |
|
maxActive: 3000, |
|
}, |
|
ruleFsts: '', |
|
ruleFars: '', |
|
}; |
|
if (myConfig) { |
|
recognizerConfig = myConfig; |
|
} |
|
|
|
return new OnlineRecognizer(recognizerConfig, Module); |
|
} |
|
|
|
function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { |
|
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; |
|
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; |
|
const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1; |
|
|
|
const n = encoderLen + decoderLen + joinerLen; |
|
|
|
const buffer = Module._malloc(n); |
|
|
|
const len = 3 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); |
|
offset += encoderLen; |
|
|
|
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); |
|
offset += decoderLen; |
|
|
|
Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen); |
|
|
|
offset = 0; |
|
Module.setValue(ptr, buffer + offset, 'i8*'); |
|
offset += encoderLen; |
|
|
|
Module.setValue(ptr + 4, buffer + offset, 'i8*'); |
|
offset += decoderLen; |
|
|
|
Module.setValue(ptr + 8, buffer + offset, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOfflineParaformerModelConfig(config, Module) { |
|
const n = Module.lengthBytesUTF8(config.model || '') + 1; |
|
|
|
const buffer = Module._malloc(n); |
|
|
|
const len = 1 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
Module.stringToUTF8(config.model || '', buffer, n); |
|
|
|
Module.setValue(ptr, buffer, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) { |
|
const n = Module.lengthBytesUTF8(config.model || '') + 1; |
|
|
|
const buffer = Module._malloc(n); |
|
|
|
const len = 1 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
Module.stringToUTF8(config.model || '', buffer, n); |
|
|
|
Module.setValue(ptr, buffer, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { |
|
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; |
|
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; |
|
const languageLen = Module.lengthBytesUTF8(config.language || '') + 1; |
|
const taskLen = Module.lengthBytesUTF8(config.task || '') + 1; |
|
|
|
const n = encoderLen + decoderLen + languageLen + taskLen; |
|
const buffer = Module._malloc(n); |
|
|
|
const len = 5 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); |
|
offset += encoderLen; |
|
|
|
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); |
|
offset += decoderLen; |
|
|
|
Module.stringToUTF8(config.language || '', buffer + offset, languageLen); |
|
offset += languageLen; |
|
|
|
Module.stringToUTF8(config.task || '', buffer + offset, taskLen); |
|
|
|
offset = 0; |
|
Module.setValue(ptr, buffer + offset, 'i8*'); |
|
offset += encoderLen; |
|
|
|
Module.setValue(ptr + 4, buffer + offset, 'i8*'); |
|
offset += decoderLen; |
|
|
|
Module.setValue(ptr + 8, buffer + offset, 'i8*'); |
|
offset += languageLen; |
|
|
|
Module.setValue(ptr + 12, buffer + offset, 'i8*'); |
|
offset += taskLen; |
|
|
|
Module.setValue(ptr + 16, config.tailPaddings || 2000, 'i32'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) { |
|
const preprocessorLen = Module.lengthBytesUTF8(config.preprocessor || '') + 1; |
|
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; |
|
const uncachedDecoderLen = |
|
Module.lengthBytesUTF8(config.uncachedDecoder || '') + 1; |
|
const cachedDecoderLen = |
|
Module.lengthBytesUTF8(config.cachedDecoder || '') + 1; |
|
|
|
const n = |
|
preprocessorLen + encoderLen + uncachedDecoderLen + cachedDecoderLen; |
|
const buffer = Module._malloc(n); |
|
|
|
const len = 4 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module.stringToUTF8( |
|
config.preprocessor || '', buffer + offset, preprocessorLen); |
|
offset += preprocessorLen; |
|
|
|
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); |
|
offset += encoderLen; |
|
|
|
Module.stringToUTF8( |
|
config.uncachedDecoder || '', buffer + offset, uncachedDecoderLen); |
|
offset += uncachedDecoderLen; |
|
|
|
Module.stringToUTF8( |
|
config.cachedDecoder || '', buffer + offset, cachedDecoderLen); |
|
offset += cachedDecoderLen; |
|
|
|
offset = 0; |
|
Module.setValue(ptr, buffer + offset, 'i8*'); |
|
offset += preprocessorLen; |
|
|
|
Module.setValue(ptr + 4, buffer + offset, 'i8*'); |
|
offset += encoderLen; |
|
|
|
Module.setValue(ptr + 8, buffer + offset, 'i8*'); |
|
offset += uncachedDecoderLen; |
|
|
|
Module.setValue(ptr + 12, buffer + offset, 'i8*'); |
|
offset += cachedDecoderLen; |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { |
|
const n = Module.lengthBytesUTF8(config.model || '') + 1; |
|
const buffer = Module._malloc(n); |
|
|
|
const len = 1 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
Module.stringToUTF8(config.model || '', buffer, n); |
|
|
|
Module.setValue(ptr, buffer, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOfflineSenseVoiceModelConfig(config, Module) { |
|
const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; |
|
const languageLen = Module.lengthBytesUTF8(config.language || '') + 1; |
|
|
|
|
|
const n = modelLen + languageLen; |
|
const buffer = Module._malloc(n); |
|
|
|
const len = 3 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module.stringToUTF8(config.model || '', buffer + offset, modelLen); |
|
offset += modelLen; |
|
|
|
Module.stringToUTF8(config.language || '', buffer + offset, languageLen); |
|
offset += languageLen; |
|
|
|
offset = 0; |
|
Module.setValue(ptr, buffer + offset, 'i8*'); |
|
offset += modelLen; |
|
|
|
Module.setValue(ptr + 4, buffer + offset, 'i8*'); |
|
offset += languageLen; |
|
|
|
Module.setValue(ptr + 8, config.useInverseTextNormalization || 0, 'i32'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOfflineLMConfig(config, Module) { |
|
const n = Module.lengthBytesUTF8(config.model || '') + 1; |
|
const buffer = Module._malloc(n); |
|
|
|
const len = 2 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
Module.stringToUTF8(config.model || '', buffer, n); |
|
Module.setValue(ptr, buffer, 'i8*'); |
|
Module.setValue(ptr + 4, config.scale || 1, 'float'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOfflineModelConfig(config, Module) { |
|
if (!('transducer' in config)) { |
|
config.transducer = { |
|
encoder: '', |
|
decoder: '', |
|
joiner: '', |
|
}; |
|
} |
|
|
|
if (!('paraformer' in config)) { |
|
config.paraformer = { |
|
model: '', |
|
}; |
|
} |
|
|
|
if (!('nemoCtc' in config)) { |
|
config.nemoCtc = { |
|
model: '', |
|
}; |
|
} |
|
|
|
if (!('whisper' in config)) { |
|
config.whisper = { |
|
encoder: '', |
|
decoder: '', |
|
language: '', |
|
task: '', |
|
tailPaddings: -1, |
|
}; |
|
} |
|
|
|
if (!('moonshine' in config)) { |
|
config.moonshine = { |
|
preprocessor: '', |
|
encoder: '', |
|
uncachedDecoder: '', |
|
cachedDecoder: '', |
|
}; |
|
} |
|
|
|
if (!('tdnn' in config)) { |
|
config.tdnn = { |
|
model: '', |
|
}; |
|
} |
|
|
|
if (!('senseVoice' in config)) { |
|
config.senseVoice = { |
|
model: '', |
|
language: '', |
|
useInverseTextNormalization: 0, |
|
}; |
|
} |
|
|
|
const transducer = |
|
initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module); |
|
|
|
const paraformer = |
|
initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module); |
|
|
|
const nemoCtc = |
|
initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module); |
|
|
|
const whisper = |
|
initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module); |
|
|
|
const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module); |
|
|
|
const senseVoice = |
|
initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module); |
|
|
|
const moonshine = |
|
initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module); |
|
|
|
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + |
|
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len; |
|
|
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); |
|
offset += transducer.len; |
|
|
|
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); |
|
offset += paraformer.len; |
|
|
|
Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset); |
|
offset += nemoCtc.len; |
|
|
|
Module._CopyHeap(whisper.ptr, whisper.len, ptr + offset); |
|
offset += whisper.len; |
|
|
|
Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset); |
|
offset += tdnn.len; |
|
|
|
const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; |
|
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; |
|
const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1; |
|
const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; |
|
const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; |
|
const teleSpeechCtcLen = |
|
Module.lengthBytesUTF8(config.teleSpeechCtc || '') + 1; |
|
|
|
const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen + |
|
bpeVocabLen + teleSpeechCtcLen; |
|
|
|
const buffer = Module._malloc(bufferLen); |
|
|
|
offset = 0; |
|
Module.stringToUTF8(config.tokens, buffer, tokensLen); |
|
offset += tokensLen; |
|
|
|
Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); |
|
offset += providerLen; |
|
|
|
Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen); |
|
offset += modelTypeLen; |
|
|
|
Module.stringToUTF8( |
|
config.modelingUnit || '', buffer + offset, modelingUnitLen); |
|
offset += modelingUnitLen; |
|
|
|
Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen); |
|
offset += bpeVocabLen; |
|
|
|
Module.stringToUTF8( |
|
config.teleSpeechCtc || '', buffer + offset, teleSpeechCtcLen); |
|
offset += teleSpeechCtcLen; |
|
|
|
offset = |
|
transducer.len + paraformer.len + nemoCtc.len + whisper.len + tdnn.len; |
|
Module.setValue(ptr + offset, buffer, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.debug || 0, 'i32'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, buffer + tokensLen + providerLen + modelTypeLen, |
|
'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, |
|
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen, |
|
'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, |
|
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen + |
|
bpeVocabLen, |
|
'i8*'); |
|
offset += 4; |
|
|
|
Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset); |
|
offset += senseVoice.len; |
|
|
|
Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, transducer: transducer, |
|
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, |
|
senseVoice: senseVoice, moonshine: moonshine, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOfflineRecognizerConfig(config, Module) { |
|
if (!('featConfig' in config)) { |
|
config.featConfig = { |
|
sampleRate: 16000, |
|
featureDim: 80, |
|
}; |
|
} |
|
|
|
if (!('lmConfig' in config)) { |
|
config.lmConfig = { |
|
model: '', |
|
scale: 1.0, |
|
}; |
|
} |
|
|
|
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); |
|
const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module); |
|
const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module); |
|
|
|
const len = feat.len + model.len + lm.len + 7 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module._CopyHeap(feat.ptr, feat.len, ptr + offset); |
|
offset += feat.len; |
|
|
|
Module._CopyHeap(model.ptr, model.len, ptr + offset); |
|
offset += model.len; |
|
|
|
Module._CopyHeap(lm.ptr, lm.len, ptr + offset); |
|
offset += lm.len; |
|
|
|
const decodingMethodLen = |
|
Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1; |
|
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1; |
|
const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; |
|
const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; |
|
const bufferLen = |
|
decodingMethodLen + hotwordsFileLen + ruleFstsLen + ruleFarsLen; |
|
const buffer = Module._malloc(bufferLen); |
|
|
|
offset = 0; |
|
Module.stringToUTF8( |
|
config.decodingMethod || 'greedy_search', buffer, decodingMethodLen); |
|
offset += decodingMethodLen; |
|
|
|
Module.stringToUTF8( |
|
config.hotwordsFile || '', buffer + offset, hotwordsFileLen); |
|
offset += hotwordsFileLen; |
|
|
|
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen); |
|
offset += ruleFstsLen; |
|
|
|
Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsLen); |
|
offset += ruleFarsLen; |
|
|
|
offset = feat.len + model.len + lm.len; |
|
|
|
Module.setValue(ptr + offset, buffer, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue( |
|
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen + ruleFstsLen, |
|
'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.blankPenalty || 0, 'float'); |
|
offset += 4; |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm |
|
} |
|
} |
|
|
|
class OfflineStream { |
|
constructor(handle, Module) { |
|
this.handle = handle; |
|
this.Module = Module; |
|
} |
|
|
|
free() { |
|
if (this.handle) { |
|
this.Module._SherpaOnnxDestroyOfflineStream(this.handle); |
|
this.handle = null; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
acceptWaveform(sampleRate, samples) { |
|
const pointer = |
|
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); |
|
this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); |
|
this.Module._SherpaOnnxAcceptWaveformOffline( |
|
this.handle, sampleRate, pointer, samples.length); |
|
this.Module._free(pointer); |
|
} |
|
}; |
|
|
|
class OfflineRecognizer { |
|
constructor(configObj, Module) { |
|
this.config = configObj; |
|
const config = initSherpaOnnxOfflineRecognizerConfig(configObj, Module); |
|
const handle = Module._SherpaOnnxCreateOfflineRecognizer(config.ptr); |
|
freeConfig(config, Module); |
|
|
|
this.handle = handle; |
|
this.Module = Module; |
|
} |
|
|
|
free() { |
|
this.Module._SherpaOnnxDestroyOfflineRecognizer(this.handle); |
|
this.handle = 0 |
|
} |
|
|
|
createStream() { |
|
const handle = this.Module._SherpaOnnxCreateOfflineStream(this.handle); |
|
return new OfflineStream(handle, this.Module); |
|
} |
|
|
|
decode(stream) { |
|
this.Module._SherpaOnnxDecodeOfflineStream(this.handle, stream.handle); |
|
} |
|
|
|
getResult(stream) { |
|
const r = |
|
this.Module._SherpaOnnxGetOfflineStreamResultAsJson(stream.handle); |
|
const jsonStr = this.Module.UTF8ToString(r); |
|
const ans = JSON.parse(jsonStr); |
|
this.Module._SherpaOnnxDestroyOfflineStreamResultJson(r); |
|
|
|
return ans; |
|
} |
|
}; |
|
|
|
class OnlineStream { |
|
constructor(handle, Module) { |
|
this.handle = handle; |
|
this.pointer = null; |
|
this.n = 0; |
|
this.Module = Module; |
|
} |
|
|
|
free() { |
|
if (this.handle) { |
|
this.Module._SherpaOnnxDestroyOnlineStream(this.handle); |
|
this.handle = null; |
|
this.Module._free(this.pointer); |
|
this.pointer = null; |
|
this.n = 0; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
acceptWaveform(sampleRate, samples) { |
|
if (this.n < samples.length) { |
|
this.Module._free(this.pointer); |
|
this.pointer = |
|
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); |
|
this.n = samples.length |
|
} |
|
|
|
this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); |
|
this.Module._SherpaOnnxOnlineStreamAcceptWaveform( |
|
this.handle, sampleRate, this.pointer, samples.length); |
|
} |
|
|
|
inputFinished() { |
|
this.Module._SherpaOnnxOnlineStreamInputFinished(this.handle); |
|
} |
|
}; |
|
|
|
class OnlineRecognizer { |
|
constructor(configObj, Module) { |
|
this.config = configObj; |
|
const config = initSherpaOnnxOnlineRecognizerConfig(configObj, Module) |
|
const handle = Module._SherpaOnnxCreateOnlineRecognizer(config.ptr); |
|
|
|
freeConfig(config, Module); |
|
|
|
this.handle = handle; |
|
this.Module = Module; |
|
} |
|
|
|
free() { |
|
this.Module._SherpaOnnxDestroyOnlineRecognizer(this.handle); |
|
this.handle = 0 |
|
} |
|
|
|
createStream() { |
|
const handle = this.Module._SherpaOnnxCreateOnlineStream(this.handle); |
|
return new OnlineStream(handle, this.Module); |
|
} |
|
|
|
isReady(stream) { |
|
return this.Module._SherpaOnnxIsOnlineStreamReady( |
|
this.handle, stream.handle) == 1; |
|
} |
|
|
|
decode(stream) { |
|
this.Module._SherpaOnnxDecodeOnlineStream(this.handle, stream.handle); |
|
} |
|
|
|
isEndpoint(stream) { |
|
return this.Module._SherpaOnnxOnlineStreamIsEndpoint( |
|
this.handle, stream.handle) == 1; |
|
} |
|
|
|
reset(stream) { |
|
this.Module._SherpaOnnxOnlineStreamReset(this.handle, stream.handle); |
|
} |
|
|
|
getResult(stream) { |
|
const r = this.Module._SherpaOnnxGetOnlineStreamResultAsJson( |
|
this.handle, stream.handle); |
|
const jsonStr = this.Module.UTF8ToString(r); |
|
const ans = JSON.parse(jsonStr); |
|
this.Module._SherpaOnnxDestroyOnlineStreamResultJson(r); |
|
|
|
return ans; |
|
} |
|
} |
|
|
|
if (typeof process == 'object' && typeof process.versions == 'object' && |
|
typeof process.versions.node == 'string') { |
|
module.exports = { |
|
createOnlineRecognizer, |
|
OfflineRecognizer, |
|
}; |
|
} |
|
|