
let ttsWebSocket = null;
const audioCtx = new (window.AudioContext || window.webkitAudioContext)();
let base64Queue = [];

let sourceQueue = [];
let nextPlayTime = 0;
   




function initiateTextToSpeech(voiceId, apiKey, textToSpeak, setIsSpeaking) {

    const modelId = 'eleven_monolingual_v1';
    const wsUrl = `wss://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream-input?model_id=${modelId}&output_format=pcm_24000`;
    if (!ttsWebSocket || ttsWebSocket.readyState === WebSocket.CLOSED) {
        ttsWebSocket = new WebSocket(wsUrl);
        console.log('WebSocket created');
    }
    console.log(textToSpeak);


    ttsWebSocket.onopen = function (event) {
        const bosMessage = {
            "text": " ",
            "voice_settings": {
                "stability": 0.5,
                "similarity_boost": 0.8
            },
            "xi_api_key": apiKey,
        };

        ttsWebSocket.send(JSON.stringify(bosMessage));

        const textMessage = {
            "text": textToSpeak,
            "try_trigger_generation": false,
        };

        ttsWebSocket.send(JSON.stringify(textMessage));

        const eosMessage = {"text": ""};
        ttsWebSocket.send(JSON.stringify(eosMessage));
    };

    ttsWebSocket.onmessage = function (event) {
        const response = JSON.parse(event.data);
        console.log('Received response:', response);

        if (response.audio) {
            const arrayBuffer = base64ToArrayBuffer(response.audio);
            const audioBuffer = createAudioBuffer(arrayBuffer);
            base64Queue.push(audioBuffer);
            if (base64Queue.length > 1) {
                decode();
            }

        }

        if (response.isFinal) {
            console.log('Final response received');
        }
    };
    
    function decode() {
        console.log(`[Decode] Queue Length: ${base64Queue.length}, Next Play Time: ${nextPlayTime}, Current Time: ${audioCtx.currentTime}`);
        if (base64Queue.length === 0) {return;}
        const audioBuffer = base64Queue.shift();
        
        

        const source = audioCtx.createBufferSource();
        source.buffer = audioBuffer;
        source.connect(audioCtx.destination);
        

        

        if (nextPlayTime < audioCtx.currentTime) {
            nextPlayTime = audioCtx.currentTime;
        }

        source.start(nextPlayTime);
        nextPlayTime += audioBuffer.duration;
    
    source.onended = function() {
        console.log("[Playback] Source ended.");
        sourceEnded();
    };
    }

    

    function sourceEnded() {
        
        if (base64Queue.length > 0) {
            decode();
        } else {
            console.log('Audio finished playing');
            setIsSpeaking(null);
        }
    }

ttsWebSocket.onerror = function (error) {
        console.error(`WebSocket Error: ${error}`);
    };

    ttsWebSocket.onclose = function (event) {
        if (event.wasClean) {
            console.info(`Connection closed cleanly, code=${event.code}, reason=${event.reason}`);
        } else {
            console.warn('Connection died');
        }
    };

    function base64ToArrayBuffer(base64) {
        const binaryData = atob(base64);
        const bytes = new Uint8Array(binaryData.length);
        for (let i = 0; i < binaryData.length; i++) {
            bytes[i] = binaryData.charCodeAt(i);
        }
        return bytes.buffer;
    }

    function createAudioBuffer(arrayBuffer) {
        const data = new DataView(arrayBuffer);
        const length = arrayBuffer.byteLength / 2;
        const audioBuffer = audioCtx.createBuffer(1, length, 24000);
        const channelData = audioBuffer.getChannelData(0);
        for (let i = 0; i < length; i++) {
            const offset = i * 2;
            if (offset + 1 >= data.byteLength) {
                console.error('Offset exceeds buffer size at sample', i);
                break;
            }
            const sample = data.getInt16(offset, true);
            channelData[i] = sample / 32768.0;
        }
        return audioBuffer;
    }
}
export default initiateTextToSpeech;
// Usage

