import OnnxWrapper from './Silero.ts';

const modelPath = "silero_vad.onnx";  // Make sure this path is correct

export class VadDetector {
    constructor(startThreshold, endThreshold, samplingRate, minSilenceDurationMs, speechPadMs) {
        if (samplingRate !== 8000 && samplingRate !== 16000) {
            throw new Error("Does not support sampling rates other than [8000, 16000]");
        }

        this.model = new OnnxWrapper(modelPath);
        this.startThreshold = startThreshold;
        this.endThreshold = endThreshold;
        this.samplingRate = samplingRate;
        this.minSilenceSamples = samplingRate * minSilenceDurationMs / 1000;
        this.speechPadSamples = samplingRate * speechPadMs / 1000;
        this.reset();
        console.log(`VadDetector initialized with: startThreshold=${startThreshold}, endThreshold=${endThreshold}, samplingRate=${samplingRate}`);
    }

    reset() {
        this.model.resetStates();
        this.triggered = false;
        this.tempEnd = 0;
        this.currentSample = 0;
        console.log('VadDetector reset');
    }

    async apply(data, returnSeconds) {
        console.log(`Applying VAD to data of length ${data.length}`);
        const windowSizeSamples = data.length;
        this.currentSample += windowSizeSamples;

        const rowLength = this.samplingRate === 16000 ? 512 : 256;

		// Ensure data is the correct length
        if (data.length < rowLength) {
            console.warn(`Input data length (${data.length}) is less than required (${rowLength}). Padding with zeros.`);
            data = [...data, ...new Array(rowLength - data.length).fill(0)];
        } else if (data.length > rowLength) {
            console.warn(`Input data length (${data.length}) is greater than required (${rowLength}). Truncating.`);
            data = data.slice(0, rowLength);
        }

        const x = [Array.from(data)];

        let speechProb;
        try {
            console.log(`Calling model with input shape: [${x.length}, ${x[0].length}], sample rate: ${this.samplingRate}`);
            const result = await this.model.call(x, this.samplingRate);
            if (result && Array.isArray(result) && result[0] && result[0][0] !== undefined) {
                speechProb = result[0][0];
                console.log(`Speech probability: ${speechProb}`);
            } else {
                throw new Error("Unexpected response from model");
            }
        } catch (e) {
            console.error("Error in VadDetector.apply:", e);
            throw new Error("Error calling the model: " + e);
        }

        if (speechProb >= this.startThreshold && this.tempEnd !== 0) {
            this.tempEnd = 0;
        }

        if (speechProb >= this.startThreshold && !this.triggered) {
            this.triggered = true;
            let speechStart = Math.max(this.currentSample - this.speechPadSamples, 0);
            console.log(`Speech start detected at sample ${speechStart}`);
            if (returnSeconds) {
                const speechStartSeconds = speechStart / this.samplingRate;
                return { start: Number(speechStartSeconds.toFixed(1)) };
            } else {
                return { start: speechStart };
            }
        }

        if (speechProb < this.endThreshold && this.triggered) {
            console.log(`Potential speech end at sample ${this.currentSample}`);
            if (this.tempEnd === 0) {
                this.tempEnd = this.currentSample;
            }
            
            if (this.currentSample - this.tempEnd < this.minSilenceSamples) {
                console.log('Silence duration too short, continuing');
                return {};
            } else {
                const speechEnd = this.tempEnd + this.speechPadSamples;
                console.log(`Speech end confirmed at sample ${speechEnd}`);
                this.tempEnd = 0;
                this.triggered = false;

                if (returnSeconds) {
                    const speechEndSeconds = speechEnd / this.samplingRate;
                    return { end: Number(speechEndSeconds.toFixed(1)) };
                } else {
                    return { end: speechEnd };
                }
            }
        }

        return {};
    }

    async close() {
        this.reset();
        await this.model.close();
    }
}