// STT service — uses whisper.cpp via Electron IPC, falls back to Web Speech API /** * Encode raw PCM Float32 chunks captured from AudioContext into a WAV Blob. * Whisper requires wav/mp3/ogg/flac — MediaRecorder produces webm which whisper rejects. */ function encodeWAV(chunks, sampleRate) { const totalLength = chunks.reduce((sum, c) => sum + c.length, 0); const samples = new Float32Array(totalLength); let offset = 0; for (const c of chunks) { samples.set(c, offset); offset += c.length; } const pcm = new Int16Array(samples.length); for (let i = 0; i < samples.length; i++) { pcm[i] = Math.max(-32768, Math.min(32767, Math.round(samples[i] * 32767))); } const header = new ArrayBuffer(44); const v = new DataView(header); const s = (off, str) => { for (let i = 0; i < str.length; i++) v.setUint8(off + i, str.charCodeAt(i)); }; s(0, 'RIFF'); v.setUint32(4, 36 + pcm.byteLength, true); s(8, 'WAVE'); s(12, 'fmt '); v.setUint32(16, 16, true); // chunk size v.setUint16(20, 1, true); // PCM v.setUint16(22, 1, true); // mono v.setUint32(24, sampleRate, true); v.setUint32(28, sampleRate * 2, true); // byte rate v.setUint16(32, 2, true); // block align v.setUint16(34, 16, true); // bits per sample s(36, 'data'); v.setUint32(40, pcm.byteLength, true); return new Blob([header, pcm.buffer], { type: 'audio/wav' }); } /** * Start recording from the microphone using AudioContext (produces proper WAV). * Returns a recorder handle with a stop() method that resolves to a WAV Blob. */ export async function startRecording() { const sampleRate = 16000; const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); const ctx = new AudioContext({ sampleRate }); const source = ctx.createMediaStreamSource(stream); // ScriptProcessor is deprecated but still works reliably in Electron const processor = ctx.createScriptProcessor(4096, 1, 1); const chunks = []; processor.onaudioprocess = (e) => { chunks.push(new Float32Array(e.inputBuffer.getChannelData(0))); }; source.connect(processor); processor.connect(ctx.destination); return { stop: async () => { processor.disconnect(); source.disconnect(); await ctx.close(); stream.getTracks().forEach(t => t.stop()); return encodeWAV(chunks, sampleRate); }, }; } /** * Transcribe an audio blob. * * In Electron: sends blob to whisper.cpp for offline transcription. * In browser: Web Speech API is the primary STT (live mic, not blob-based). * NOTE — Web Speech listens to the microphone in real time, so * calling this from a browser without Electron will start a new * listening session rather than processing the provided blob. * This is intentional: browsers cannot re-process a recorded blob * with Web Speech API. */ export async function transcribeAudio(audioBlob) { if (!audioBlob) return ''; // ── Electron path: send blob to whisper.cpp ────────────────────────────── if (typeof window !== 'undefined' && window.electronAPI?.stt) { try { const reader = new FileReader(); const base64Promise = new Promise((resolve, reject) => { reader.onload = () => resolve(reader.result.split(',')[1]); reader.onerror = reject; }); reader.readAsDataURL(audioBlob); const base64Audio = await base64Promise; const result = await window.electronAPI.stt.transcribe(base64Audio); if (result?.error && !result?.text) { // whisper reported a hard error (binary missing, etc.) — return empty console.warn('Whisper STT error:', result.error); return ''; } return result?.text ?? ''; } catch (err) { console.warn('Whisper STT IPC error:', err); return ''; } } // ── Browser fallback: Web Speech API (live mic) ────────────────────────── const SpeechRecognition = (typeof window !== 'undefined') && (window.SpeechRecognition || window.webkitSpeechRecognition); if (SpeechRecognition) { return new Promise((resolve, reject) => { const recognition = new SpeechRecognition(); recognition.continuous = false; recognition.interimResults = false; recognition.lang = 'en-US'; recognition.onresult = (event) => resolve(event.results[0][0].transcript); recognition.onerror = (err) => reject(err); recognition.start(); }); } throw new Error('No STT available'); } export function isSTTAvailable() { if (typeof window === 'undefined') return false; if (window.electronAPI?.stt) return true; return !!(window.SpeechRecognition || window.webkitSpeechRecognition); }