Initial commit: Sarthi Lab desktop application

2026-03-11 03:59:38 +05:30
commit bb1ec0a584
49 changed files with 15191 additions and 0 deletions
--- a/src/services/stt.js
+++ b/src/services/stt.js
@@ -0,0 +1,131 @@
+// STT service — uses whisper.cpp via Electron IPC, falls back to Web Speech API
+
+/**
+ * Encode raw PCM Float32 chunks captured from AudioContext into a WAV Blob.
+ * Whisper requires wav/mp3/ogg/flac — MediaRecorder produces webm which whisper rejects.
+ */
+function encodeWAV(chunks, sampleRate) {
+  const totalLength = chunks.reduce((sum, c) => sum + c.length, 0);
+  const samples = new Float32Array(totalLength);
+  let offset = 0;
+  for (const c of chunks) { samples.set(c, offset); offset += c.length; }
+
+  const pcm = new Int16Array(samples.length);
+  for (let i = 0; i < samples.length; i++) {
+    pcm[i] = Math.max(-32768, Math.min(32767, Math.round(samples[i] * 32767)));
+  }
+
+  const header = new ArrayBuffer(44);
+  const v = new DataView(header);
+  const s = (off, str) => { for (let i = 0; i < str.length; i++) v.setUint8(off + i, str.charCodeAt(i)); };
+
+  s(0, 'RIFF');  v.setUint32(4, 36 + pcm.byteLength, true);
+  s(8, 'WAVE'); s(12, 'fmt ');
+  v.setUint32(16, 16, true);          // chunk size
+  v.setUint16(20, 1, true);           // PCM
+  v.setUint16(22, 1, true);           // mono
+  v.setUint32(24, sampleRate, true);
+  v.setUint32(28, sampleRate * 2, true); // byte rate
+  v.setUint16(32, 2, true);           // block align
+  v.setUint16(34, 16, true);          // bits per sample
+  s(36, 'data'); v.setUint32(40, pcm.byteLength, true);
+
+  return new Blob([header, pcm.buffer], { type: 'audio/wav' });
+}
+
+/**
+ * Start recording from the microphone using AudioContext (produces proper WAV).
+ * Returns a recorder handle with a stop() method that resolves to a WAV Blob.
+ */
+export async function startRecording() {
+  const sampleRate = 16000;
+  const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+  const ctx = new AudioContext({ sampleRate });
+  const source = ctx.createMediaStreamSource(stream);
+  // ScriptProcessor is deprecated but still works reliably in Electron
+  const processor = ctx.createScriptProcessor(4096, 1, 1);
+  const chunks = [];
+
+  processor.onaudioprocess = (e) => {
+    chunks.push(new Float32Array(e.inputBuffer.getChannelData(0)));
+  };
+
+  source.connect(processor);
+  processor.connect(ctx.destination);
+
+  return {
+    stop: async () => {
+      processor.disconnect();
+      source.disconnect();
+      await ctx.close();
+      stream.getTracks().forEach(t => t.stop());
+      return encodeWAV(chunks, sampleRate);
+    },
+  };
+}
+
+/**
+ * Transcribe an audio blob.
+ *
+ * In Electron: sends blob to whisper.cpp for offline transcription.
+ * In browser:  Web Speech API is the primary STT (live mic, not blob-based).
+ *              NOTE — Web Speech listens to the microphone in real time, so
+ *              calling this from a browser without Electron will start a new
+ *              listening session rather than processing the provided blob.
+ *              This is intentional: browsers cannot re-process a recorded blob
+ *              with Web Speech API.
+ */
+export async function transcribeAudio(audioBlob) {
+  if (!audioBlob) return '';
+
+  // ── Electron path: send blob to whisper.cpp ──────────────────────────────
+  if (typeof window !== 'undefined' && window.electronAPI?.stt) {
+    try {
+      const reader = new FileReader();
+      const base64Promise = new Promise((resolve, reject) => {
+        reader.onload  = () => resolve(reader.result.split(',')[1]);
+        reader.onerror = reject;
+      });
+      reader.readAsDataURL(audioBlob);
+
+      const base64Audio = await base64Promise;
+      const result = await window.electronAPI.stt.transcribe(base64Audio);
+
+      if (result?.error && !result?.text) {
+        // whisper reported a hard error (binary missing, etc.) — return empty
+        console.warn('Whisper STT error:', result.error);
+        return '';
+      }
+
+      return result?.text ?? '';
+    } catch (err) {
+      console.warn('Whisper STT IPC error:', err);
+      return '';
+    }
+  }
+
+  // ── Browser fallback: Web Speech API (live mic) ──────────────────────────
+  const SpeechRecognition =
+    (typeof window !== 'undefined') &&
+    (window.SpeechRecognition || window.webkitSpeechRecognition);
+
+  if (SpeechRecognition) {
+    return new Promise((resolve, reject) => {
+      const recognition = new SpeechRecognition();
+      recognition.continuous      = false;
+      recognition.interimResults  = false;
+      recognition.lang            = 'en-US';
+      recognition.onresult        = (event) => resolve(event.results[0][0].transcript);
+      recognition.onerror         = (err)   => reject(err);
+      recognition.start();
+    });
+  }
+
+  throw new Error('No STT available');
+}
+
+export function isSTTAvailable() {
+  if (typeof window === 'undefined') return false;
+  if (window.electronAPI?.stt) return true;
+  return !!(window.SpeechRecognition || window.webkitSpeechRecognition);
+}