import { StreamingTranscriberTokenProvider, type StreamingAmbientToken } from "./ambientstreamingtokenprovider";
import PCMProcessorUrl from './PCMProcessor?worker&url'

import * as SDK from "microsoft-cognitiveservices-speech-sdk";



export interface MicrophoneAudioTranscriberOptions {
    microphone: MediaDeviceInfo;
    recognition_language: "nb-NO" | "en-GB";
    on_data?: (data: Int16Array) => void;
    on_recognizing: (text: string) => void;
    on_recognized: (text: string) => void;
    context_id: string;
    ambience_id: string;
    region: string;
}

export class MicrophoneAudioTranscriber {
    private options: MicrophoneAudioTranscriberOptions;
    private recognizer: SDK.SpeechRecognizer | null = null;
    private audio_context: AudioContext | null = null;
    private media_stream: MediaStream | null = null;
    private media_stream_source: MediaStreamAudioSourceNode | null = null;
    private worklet_node: AudioWorkletNode | null = null;
    private token_provider: StreamingTranscriberTokenProvider;
    public token: StreamingAmbientToken | null = null; // todo is nice for visualizing in debug mode, move to private later on
    private refresh_timer: NodeJS.Timeout | null = null;
    public duration: number = 0;


    public constructor(options: MicrophoneAudioTranscriberOptions) {
        this.options = options;
        this.token_provider = new StreamingTranscriberTokenProvider(options.ambience_id);
        SDK.Recognizer.enableTelemetry(false);
    }

    async start(): Promise<void> {
        const stream = await navigator.mediaDevices.getUserMedia({
            audio: {
                deviceId: this.options.microphone.deviceId
            },
        });
        this.token = await this.token_provider.token();
        const region = this.options.region;
        const speech_config = SDK.SpeechConfig.fromAuthorizationToken(this.token.token, region);
        speech_config.speechRecognitionLanguage = this.options.recognition_language;
        const push_stream = SDK.PushAudioInputStream.createPushStream();
        const audio_config = SDK.AudioConfig.fromStreamInput(push_stream);
        const recognizer = new SDK.SpeechRecognizer(speech_config, audio_config);

        const recognizing = this.options.on_recognizing;
        const recognized = this.options.on_recognized;
        recognizer.recognizing = (s, e) => {
            recognizing(e.result.text);
        }

        recognizer.recognized = (s, e) => {
            recognized(e.result.text);
        };

        this.refresh_timer = setInterval(async () => {
            this.token = await this.token_provider.token();
            if (this.recognizer) {
                this.recognizer.authorizationToken = this.token.token;
            }
        }, 1000 * 60);

        recognizer.canceled = (s, e) => {
            switch (e.errorCode) {
                case SDK.CancellationErrorCode.TooManyRequests:
                    console.error("Too many requests to the transcriber. Retrying in 1 second...");
                    this.startWithRetry();
                    break;
                default:
                    console.error(e.errorDetails);
                    break;
            }
        };


        try {
            this.audio_context = new AudioContext({ sampleRate: 16000 });
            await this.audio_context.audioWorklet.addModule(PCMProcessorUrl);
            this.media_stream = stream;
            this.media_stream_source = this.audio_context.createMediaStreamSource(
                this.media_stream,
            );
            this.worklet_node = new AudioWorkletNode(
                this.audio_context,
                "audio-worklet-processor",
            );
            this.worklet_node.port.onmessage = async (event) => {
                const buffer = event.data.buffer;
                const samples = new Int16Array(buffer);
                if (this.options.on_data) {
                    this.options.on_data(samples);
                }
                push_stream.write(buffer);
                this.duration += samples.length / 16000;

            };
            this.media_stream_source.connect(this.worklet_node);
            this.worklet_node.connect(this.audio_context.destination);
            this.recognizer = recognizer;
            await this.startWithRetry();
        } catch (error) {
            console.error(error);
            this.stop();
        }
    }

    private async startWithRetry(): Promise<void> {
        try {
            this.recognizer?.startContinuousRecognitionAsync(
                () => {
                    console.log("Recognition started successfully");
                },
                (err: string) => {
                    console.warn("Failed to start recognition. Retrying in 1 second...", err);
                    setTimeout(() => this.startWithRetry(), 1000);
                });
        }
        catch (err) {
            console.warn("Failed to start recognition. Retrying in 1 second...", err);
            setTimeout(() => this.startWithRetry(), 1000);
        }
    }


    async stop(): Promise<void> {
        if (!this.recognizer) {
            console.warn("Recognizer is already disposed or not initialized.");
            return;
        }

        try {
            await new Promise<void>((resolve, reject) => {
                this.recognizer!.stopContinuousRecognitionAsync(
                    () => {
                        console.log("Recognition stopped successfully");
                        resolve();
                    },
                    (err) => reject(new Error(`Failed to stop recognition: ${err}`))
                );
            });
        } catch (error) {
            console.error("Error stopping recognition:", error);
        } finally {
            await this.cleanup();
        }
    }

    private is_recognizer_disposed: boolean = false;
    private async cleanup(): Promise<void> {
        if (this.is_recognizer_disposed) {
            console.warn("Recognizer is already disposed.");
            return;
        }

        this.media_stream?.getTracks().forEach((track: any) => track.stop());
        await this.audio_context?.close();
        if (this.refresh_timer) {
            clearTimeout(this.refresh_timer);
        }
        this.recognizer?.close();
        this.refresh_timer = null
        this.recognizer = null;
        this.media_stream = null;
        this.media_stream_source = null;
        this.worklet_node = null;
        this.audio_context = null;
        this.is_recognizer_disposed = true;
    }
}