import React, { useState, useRef, useEffect } from 'react';
import { ResultReason } from 'microsoft-cognitiveservices-speech-sdk';
// Import Azure Speech SDK

const REACT_APP_SPEECH_KEY='7603d81833d04a229e7190f7cd0284e2'
const REACT_APP_SPEECH_REGION='eastus'

const speechsdk = require('microsoft-cognitiveservices-speech-sdk');

const AudioTranscription = () => {
  const [transcription, setTranscription] = useState('');
  const [micStream, setMicStream] = useState(null);
  const [systemStream, setSystemStream] = useState(null);

  async function captureSystemAudio() {
    try {
      const systemStream = await navigator.mediaDevices.getDisplayMedia({
        video: true,
        audio: true,
        systemAudio: "include",
      });
      setSystemStream(systemStream);
      return new MediaStream(systemStream.getAudioTracks());
    } catch (error) {
      console.error("Error capturing system audio:", error);
      return null;
    }
  }

  async function captureMicrophoneAudio() {
    try {
      const micStream = await navigator.mediaDevices.getUserMedia({
        audio: true
      });
      setMicStream(micStream);
      return micStream;
    } catch (error) {
      console.error("Error capturing microphone audio:", error);
      return null;
    }
  }

  function combineAudioStreams(systemStream, micStream) {
    const audioContext = new AudioContext();
    const destination = audioContext.createMediaStreamDestination();
    
    if (systemStream) {
      const systemSource = audioContext.createMediaStreamSource(systemStream);
      systemSource.connect(destination);
    }
  
    if (micStream) {
      const micSource = audioContext.createMediaStreamSource(micStream);
      micSource.connect(destination);
    }
  
    return destination.stream;  // This combined stream can be used for transcription
  }
  
  
  const startRecording = async () => {
    try {
      // let speakerStream = await navigator.mediaDevices.getDisplayMedia({
      //   // We're not going to be using the video track
      //   video: true,
      //   audio: true,
      //   systemAudio: "include",
      // });
      // let audioOnlySpeakerStream = new MediaStream(speakerStream.getAudioTracks());
      // let micStream = await navigator.mediaDevices.getUserMedia({'audio': true});


      const systemStream = await captureSystemAudio();
      const micStream = await captureMicrophoneAudio();

      const combinedStream = combineAudioStreams(systemStream, micStream);
      
      const speechConfig = speechsdk.SpeechConfig.fromSubscription(REACT_APP_SPEECH_KEY, REACT_APP_SPEECH_REGION);
      speechConfig.speechRecognitionLanguage = 'en-US';
      let audioConfig = speechsdk.AudioConfig.fromStreamInput(combinedStream);
      let transcriber = new speechsdk.ConversationTranscriber(speechConfig, audioConfig);

      transcriber.startTranscribingAsync(() => {
        console.log('Continuous transcription started');
      }, (err) => {
        console.error('Failed to start continuous transcription:', err);
      });

      transcriber.transcribed = (s, e) => {
        console.log(`${e.result.speakerId}=${e.result.text}`);
        if(e.result.reason === ResultReason.RecognizedSpeech && e.result.speakerId !== 'Unknown') {
          setTranscription((prevTranscription) => 
            prevTranscription 
          ? `${prevTranscription}\n${e.result.speakerId}=${e.result.text}`
          : `${e.result.speakerId}=${e.result.text}`
          );
        }
      };

      transcriber.canceled = (s, e) => {
        console.error(`CANCELED: Reason=${e.reason}`);
        if (e.reason === speechsdk.CancellationReason.Error) {
          console.error(`CANCELED: ErrorCode=${e.errorCode}`);
          console.error(`CANCELED: ErrorDetails=${e.errorDetails}`);
        }
      };

      alert('Recording and transcription started');
    } catch (err) {
      console.error('Error starting recording: ', err);
    }
  };

  const stopRecording = () => {
    console.log('Stopping recording');
    micStream.getTracks().forEach(track => track.stop());
    systemStream.getTracks().forEach(track => track.stop());
  };

  return (
    <div>
      <h1>Real-Time Transcription</h1>

      {/* Video element for previewing the combined audio stream */}
      {/* <video ref={recordingRef} autoPlay muted width="500px" height="500px"></video> */}

      <div>
        <button onClick={startRecording}>Start Recording</button>
        <button onClick={stopRecording}>Stop Recording</button>
        {/* <a ref={downloadRef} href="#" download="audio-output.mp4">Download Recorded Audio</a> */}
      </div>

      {/* Display Transcription */}
      <div>
        <h2>Transcription</h2>
        <textarea value={transcription} rows="10" cols="50" readOnly />
      </div>
    </div>
  );
};

export default AudioTranscription;
