From 990ef03dcac62a3d3a35104cf6f16e223b068c18 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 7 Jun 2025 23:01:17 +0000 Subject: [PATCH] Fix: Make naudiodon optional and document prerequisites This commit addresses build failures related to the `naudiodon` package encountered during `npm install`. Changes Made: 1. **`naudiodon` as Optional Dependency:** * Moved `naudiodon` from `dependencies` to `optionalDependencies` in `package.json`. This allows `npm install` to succeed even if `naudiodon` fails to build on your system, preventing the installation from being blocked. 2. **Graceful Handling of `naudiodon` Absence:** * Modified `src/process/tts_process.js` to dynamically import `naudiodon`. * If `naudiodon` is not found or fails to load, the Speech-to-Text (STT) functionality that relies on it for microphone input will be gracefully disabled. * The application will log a warning in this case but will otherwise start and run normally. 3. **Documentation of Prerequisites:** * Updated `README.md` with a new section detailing the system prerequisites for building `naudiodon` successfully on Linux, Windows, and macOS. This includes commands for installing necessary C++ compilers, development tools, and PortAudio libraries. * Added notes to the README explaining that `naudiodon` is used for STT and is optional. **Summary of Approach:** The primary goal was to resolve the `npm install` error caused by `naudiodon`. By making it an optional dependency and ensuring the application handles its absence, you can now install and run the core application without needing to immediately troubleshoot `naudiodon` build issues. If you wish to use the STT feature, you can refer to the updated README for guidance on installing the necessary system dependencies for `naudiodon`. **Note on Your Feedback (STT Alternatives):** You expressed a desire for STT to work even without `naudiodon`, possibly using alternative packages. While this commit ensures the application no longer errors out due to `naudiodon` and makes STT optionally functional, it does not replace `naudiodon` with an alternative for STT audio input. Exploring and integrating alternative cross-platform audio input libraries for STT would be a separate task. This set of changes should improve the installation experience across different platforms. --- README.md | 71 ++++- package.json | 4 +- src/process/tts_process.js | 570 +++++++++++++++++++++---------------- 3 files changed, 394 insertions(+), 251 deletions(-) diff --git a/README.md b/README.md index 7f422ff..2990665 100644 --- a/README.md +++ b/README.md @@ -14,15 +14,80 @@ Do not connect this bot to public servers with coding enabled. This project allo - [Node.js Installed](https://nodejs.org/) (at least v14) - One of these: [OpenAI API Key](https://openai.com/blog/openai-api) | [Gemini API Key](https://aistudio.google.com/app/apikey) | [Anthropic API Key](https://docs.anthropic.com/claude/docs/getting-access-to-claude) | [Replicate API Key](https://replicate.com/) | [Hugging Face API Key](https://huggingface.co/) | [Groq API Key](https://console.groq.com/keys) | [Ollama Installed](https://ollama.com/download). | [Mistral API Key](https://docs.mistral.ai/getting-started/models/models_overview/) | [Qwen API Key [Intl.]](https://www.alibabacloud.com/help/en/model-studio/developer-reference/get-api-key)/[[cn]](https://help.aliyun.com/zh/model-studio/getting-started/first-api-call-to-qwen?) | [Novita AI API Key](https://novita.ai/settings?utm_source=github_mindcraft&utm_medium=github_readme&utm_campaign=link#key-management) | +## Installation Prerequisites + +### `naudiodon` for Speech-to-Text (STT) + +The STT (Speech-to-Text) functionality in Mindcraft uses the `naudiodon` package for audio input. `naudiodon` is a native Node.js addon and might require additional steps to compile correctly during `npm install`. + +**`naudiodon` is an optional dependency.** This means: +* If `naudiodon` fails to install or build, the core Mindcraft application will still run. +* However, the Speech-to-Text (STT) feature will be automatically disabled if `naudiodon` is not available. You will see warnings in the console if it fails to load. +* If you wish to use STT and encounter build issues with `naudiodon`, please ensure you have the necessary build tools and libraries listed below for your operating system. + +**General Requirements for Building `naudiodon`:** +* **Node.js:** Ensure Node.js (v14+) is properly installed and added to your system's PATH. +* **Python:** `node-gyp` (the tool used to build native addons like `naudiodon`) requires Python. Recent versions of `node-gyp` are compatible with Python 3.x. Make sure Python is installed and accessible. +* **C++ Compiler Toolchain:** A C++ compiler (like g++ or MSVC) and related build tools (like `make` or MSBuild) are necessary. +* **PortAudio Library:** `naudiodon` specifically requires the PortAudio library. + +**Operating System Specifics for `PortAudio` (and `naudiodon` build):** + +### Linux +* **Debian/Ubuntu:** + ```bash + sudo apt-get update + sudo apt-get install build-essential libasound2-dev libportaudio-dev + ``` + (`build-essential` provides g++, make, etc. `libasound2-dev` is for ALSA, and `libportaudio-dev` is crucial for `naudiodon`.) + +* **Fedora/RHEL/CentOS:** + ```bash + # For newer Fedora (using dnf) + sudo dnf groupinstall "Development Tools" + sudo dnf install alsa-lib-devel portaudio-devel + + # For older RHEL/CentOS (using yum) + sudo yum groupinstall "Development Tools" + sudo yum install alsa-lib-devel portaudio-devel + ``` + (`portaudio-devel` is the equivalent of `libportaudio-dev`.) + +### Windows +* **Visual Studio C++ Build Tools:** This is the recommended way. + 1. Download the [Visual Studio Installer](https://visualstudio.microsoft.com/downloads/). + 2. Run the installer and select "Desktop development with C++" under the "Workloads" tab. This will install the necessary C++ compiler, MSBuild, and Windows SDKs. + 3. Ensure that Python is correctly configured for `node-gyp`. If you have multiple Python versions, you might need to tell `npm` which one to use (e.g., `npm config set python C:\path\to\python.exe`) or ensure your desired Python version is first in your system's PATH. +* **MSYS2/MinGW:** While possible, this can be more complex. You would need to compile/install PortAudio within the MSYS2 environment and ensure `node-gyp` is configured to use the MinGW toolchain. Using the Visual Studio C++ Build Tools is generally more straightforward for `node-gyp` on Windows. + +### macOS +* **Xcode Command Line Tools:** + ```bash + xcode-select --install + ``` + (This installs Clang, make, and other necessary build tools.) +* **PortAudio:** + ```bash + brew install portaudio + ``` + (Homebrew is the easiest way to install PortAudio on macOS.) +* **pkg-config (if needed):** + ```bash + brew install pkg-config + ``` + (Sometimes required for build scripts to find library information.) + +If you see warnings or errors related to `naudiodon` during `npm install` and you *do not* intend to use the STT feature, these can typically be ignored. If you *do* want STT, ensure the above prerequisites are met. + ## Install and Run -1. Make sure you have the requirements above. +1. Make sure you have the requirements above. If you plan to use the STT (Speech-to-Text) feature, also review the "Installation Prerequisites" section regarding `naudiodon`. 2. Clone or download this repository (big green button) 3. Rename `keys.example.json` to `keys.json` and fill in your API keys (you only need one). The desired model is set in `andy.json` or other profiles. For other models refer to the table below. -4. In terminal/command prompt, run `npm install` from the installed directory +4. In terminal/command prompt, run `npm install` from the installed directory. (Note: If `naudiodon` fails to build and you don't need STT, you can usually proceed.) 5. Start a minecraft world and open it to LAN on localhost port `55916` @@ -131,7 +196,7 @@ STT can be enabled in `settings.js` under the section that looks like this: "stt_agent_name": "" ``` -The Text to Speech engine will begin listening on the system default input device. +The Text to Speech engine will begin listening on the system default input device. **Note:** Successful STT operation depends on the `naudiodon` package, which is an optional dependency. If `naudiodon` failed to install or build (see "Installation Prerequisites" for troubleshooting), STT will be disabled. When using STT, you **need** a [GroqCloud API key](https://console.groq.com/keys) as Groq is used for Audio transcription diff --git a/package.json b/package.json index c713c92..25b6235 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,6 @@ "mineflayer-collectblock": "^1.4.1", "mineflayer-pathfinder": "^2.4.5", "mineflayer-pvp": "^1.3.2", - "naudiodon": "^2.3.6", "node-canvas-webgl": "PrismarineJS/node-canvas-webgl", "openai": "^4.4.0", "patch-package": "^8.0.0", @@ -33,6 +32,9 @@ "wav": "^1.0.2", "yargs": "^17.7.2" }, + "optionalDependencies": { + "naudiodon": "^2.3.6" + }, "scripts": { "postinstall": "patch-package", "start": "node main.js" diff --git a/src/process/tts_process.js b/src/process/tts_process.js index 5d20259..59d97e5 100644 --- a/src/process/tts_process.js +++ b/src/process/tts_process.js @@ -1,247 +1,323 @@ -import settings from '../../settings.js'; -import { GroqCloudTTS } from '../models/groq.js'; -import portAudio from 'naudiodon'; -const { AudioIO, SampleFormat16Bit } = portAudio; -import wav from 'wav'; -import fs from 'fs'; -import path from 'path'; -import { fileURLToPath } from 'url'; - -// Import getIO and our new function getAllInGameAgentNames -import { getIO, getAllInGameAgentNames } from '../server/mind_server.js'; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = path.dirname(__filename); - -/** - * Delete leftover speech_*.wav from previous runs - */ -const leftover = fs.readdirSync(__dirname).filter(f => /^speech_\d+\.wav$/.test(f)); -for (const file of leftover) { - try { - fs.unlinkSync(path.join(__dirname, file)); - } catch (_) { - // ignore errors - } -} - -// Configuration -const RMS_THRESHOLD = 500; // Lower threshold for faint audio -const SILENCE_DURATION = 2000; // 2 seconds of silence after speech => stop -const SAMPLE_RATE = 16000; -const BIT_DEPTH = 16; -const STT_USERNAME = settings.stt_username || "SERVER"; // Name that appears as sender -const STT_AGENT_NAME = settings.stt_agent_name || ""; // If blank, broadcast to all - -// Guards to prevent multiple overlapping recordings -let isRecording = false; // Ensures only one recordAndTranscribeOnce at a time -let sttRunning = false; // Ensures continuousLoop is started only once - -/** - * Records one session, transcribes, and sends to MindServer as a chat message - */ -async function recordAndTranscribeOnce() { - // If another recording is in progress, just skip - if (isRecording) { - console.log("Another recording is still in progress; skipping new record attempt."); - return null; - } - isRecording = true; - - const outFile = path.join(__dirname, `speech_${Date.now()}.wav`); - const fileWriter = new wav.FileWriter(outFile, { - channels: 1, - sampleRate: SAMPLE_RATE, - bitDepth: BIT_DEPTH - }); - const ai = new AudioIO({ - inOptions: { - channelCount: 1, - sampleFormat: SampleFormat16Bit, - sampleRate: SAMPLE_RATE, - deviceId: -1, - closeOnError: true - } - }); - - let recording = true; - let hasHeardSpeech = false; - let silenceTimer = null; - let finished = false; // Guard to ensure final processing is done only once - - // Helper to reset silence timer - function resetSilenceTimer() { - if (silenceTimer) clearTimeout(silenceTimer); - if (hasHeardSpeech) { - silenceTimer = setTimeout(() => stopRecording(), SILENCE_DURATION); - } - } - - // Stop recording - function stopRecording() { - if (!recording) return; - recording = false; - ai.quit(); - fileWriter.end(); - } - - // We wrap everything in a promise so we can await the transcription - return new Promise((resolve, reject) => { - // Attach event handlers - ai.on('data', (chunk) => { - fileWriter.write(chunk); - - // Calculate RMS for threshold detection - let sumSquares = 0; - const sampleCount = chunk.length / 2; - for (let i = 0; i < chunk.length; i += 2) { - const sample = chunk.readInt16LE(i); - sumSquares += sample * sample; - } - const rms = Math.sqrt(sumSquares / sampleCount); - - // If RMS passes threshold, we've heard speech - if (rms > RMS_THRESHOLD) { - if (!hasHeardSpeech) { - hasHeardSpeech = true; - } - resetSilenceTimer(); - } - }); - - ai.on('error', (err) => { - cleanupListeners(); - reject(err); - }); - - fileWriter.on('finish', async () => { - if (finished) return; - finished = true; - try { - // Check audio duration - const stats = fs.statSync(outFile); - const headerSize = 44; // standard WAV header size - const dataSize = stats.size - headerSize; - const duration = dataSize / (SAMPLE_RATE * (BIT_DEPTH / 8)); - if (duration < 2.75) { - console.log("Audio too short (<2.75s); discarding."); - fs.unlink(outFile, () => {}); - cleanupListeners(); - return resolve(null); - } - - // Transcribe - const groqTTS = new GroqCloudTTS(); - const text = await groqTTS.transcribe(outFile, { - model: "distil-whisper-large-v3-en", - prompt: "", - response_format: "json", - language: "en", - temperature: 0.0 - }); - - fs.unlink(outFile, () => {}); // cleanup WAV file - - // Basic check for empty or whitespace - if (!text || !text.trim()) { - console.log("Transcription empty; discarding."); - cleanupListeners(); - return resolve(null); - } - - // Heuristic checks to determine if the transcription is genuine - - // 1. Ensure at least one alphabetical character - if (!/[A-Za-z]/.test(text)) { - console.log("Transcription has no letters; discarding."); - cleanupListeners(); - return resolve(null); - } - - // 2. Check for gibberish repeated sequences - if (/([A-Za-z])\1{3,}/.test(text)) { - console.log("Transcription looks like gibberish; discarding."); - cleanupListeners(); - return resolve(null); - } - - // 3. Check transcription length, with allowed greetings - const letterCount = text.replace(/[^A-Za-z]/g, "").length; - const normalizedText = text.trim().toLowerCase(); - const allowedGreetings = new Set(["hi", "hello", "greetings", "hey"]); - - if (letterCount < 8 && !allowedGreetings.has(normalizedText)) { - console.log("Transcription too short and not an allowed greeting; discarding."); - cleanupListeners(); - return resolve(null); - } - - console.log("Transcription:", text); - - // Format message so it looks like: "[SERVER] message" - const finalMessage = `[${STT_USERNAME}] ${text}`; - - // If STT_AGENT_NAME is empty, broadcast to all agents - if (!STT_AGENT_NAME.trim()) { - const agentNames = getAllInGameAgentNames(); // from mind_server - for (const agentName of agentNames) { - getIO().emit('send-message', agentName, finalMessage); - } - } else { - // Otherwise, send only to the specified agent - getIO().emit('send-message', STT_AGENT_NAME, finalMessage); - } - - cleanupListeners(); - resolve(text); - } catch (err) { - cleanupListeners(); - reject(err); - } - }); - - ai.start(); - - function cleanupListeners() { - ai.removeAllListeners('data'); - ai.removeAllListeners('error'); - fileWriter.removeAllListeners('finish'); - if (silenceTimer) clearTimeout(silenceTimer); - - // release lock - isRecording = false; - } - }); -} - -/** - * Runs recording sessions sequentially, so only one at a time - */ -async function continuousLoop() { - while (true) { - try { - await recordAndTranscribeOnce(); - } catch (err) { - console.error("[STT Error]", err); - } - // short gap - await new Promise(res => setTimeout(res, 1000)); - } -} - -export function initTTS() { - // Only run if stt_transcription is true and we haven't started already - if (!settings.stt_transcription) return; - - if (sttRunning) { - console.log("STT loop already running; skipping re-init."); - return; - } - sttRunning = true; - - continuousLoop().catch((err) => { - console.error("[STT] continuousLoop crashed", err); - }); -} - -initTTS(); +import settings from '../../settings.js'; +import { GroqCloudTTS } from '../models/groq.js'; +// import portAudio from 'naudiodon'; // Original static import +// const { AudioIO, SampleFormat16Bit } = portAudio; // Original destructuring +import wav from 'wav'; +import fs from 'fs'; +import path from 'path'; +import { fileURLToPath } from 'url'; + +// Import getIO and our new function getAllInGameAgentNames +import { getIO, getAllInGameAgentNames } from '../server/mind_server.js'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +// --- Conditional Naudiodon Import --- +let portAudio; +let AudioIO; +let SampleFormat16Bit; + +(async () => { + try { + const naudiodonModule = await import('naudiodon'); + portAudio = naudiodonModule.default; // CommonJS modules often export functionality on 'default' when imported into ES modules + if (portAudio && typeof portAudio.AudioIO === 'function' && typeof portAudio.SampleFormat16Bit !== 'undefined') { + AudioIO = portAudio.AudioIO; + SampleFormat16Bit = portAudio.SampleFormat16Bit; + console.log('[STT] naudiodon loaded successfully.'); + } else if (naudiodonModule.AudioIO && typeof naudiodonModule.SampleFormat16Bit !== 'undefined') { + // Fallback if 'default' is not used and properties are directly on the module + AudioIO = naudiodonModule.AudioIO; + SampleFormat16Bit = naudiodonModule.SampleFormat16Bit; + portAudio = naudiodonModule; // Assign the module itself to portAudio for consistency if needed elsewhere + console.log('[STT] naudiodon loaded successfully (direct properties).'); + } + else { + throw new Error('AudioIO or SampleFormat16Bit not found in naudiodon module exports.'); + } + } catch (err) { + console.warn(`[STT] Failed to load naudiodon, Speech-to-Text will be disabled. Error: ${err.message}`); + portAudio = null; + AudioIO = null; + SampleFormat16Bit = null; + } + // Initialize TTS after attempting to load naudiodon + initTTS(); +})(); + + +/** + * Delete leftover speech_*.wav from previous runs + */ +const leftover = fs.readdirSync(__dirname).filter(f => /^speech_\d+\.wav$/.test(f)); +for (const file of leftover) { + try { + fs.unlinkSync(path.join(__dirname, file)); + } catch (_) { + // ignore errors + } +} + +// Configuration +const RMS_THRESHOLD = 500; // Lower threshold for faint audio +const SILENCE_DURATION = 2000; // 2 seconds of silence after speech => stop +const SAMPLE_RATE = 16000; +const BIT_DEPTH = 16; +const STT_USERNAME = settings.stt_username || "SERVER"; // Name that appears as sender +const STT_AGENT_NAME = settings.stt_agent_name || ""; // If blank, broadcast to all + +// Guards to prevent multiple overlapping recordings +let isRecording = false; // Ensures only one recordAndTranscribeOnce at a time +let sttRunning = false; // Ensures continuousLoop is started only once + +/** + * Records one session, transcribes, and sends to MindServer as a chat message + */ +async function recordAndTranscribeOnce() { + // If another recording is in progress, just skip + if (isRecording) { + console.log("[STT] Another recording is still in progress; skipping new record attempt."); + return null; + } + isRecording = true; + + const outFile = path.join(__dirname, `speech_${Date.now()}.wav`); + const fileWriter = new wav.FileWriter(outFile, { + channels: 1, + sampleRate: SAMPLE_RATE, + bitDepth: BIT_DEPTH + }); + + // This is where AudioIO is crucial + if (!AudioIO || !SampleFormat16Bit) { + console.warn("[STT] AudioIO or SampleFormat16Bit not available. Cannot record audio."); + isRecording = false; + return null; + } + + const ai = new AudioIO({ + inOptions: { + channelCount: 1, + sampleFormat: SampleFormat16Bit, + sampleRate: SAMPLE_RATE, + deviceId: -1, + closeOnError: true + } + }); + + let recording = true; + let hasHeardSpeech = false; + let silenceTimer = null; + let finished = false; // Guard to ensure final processing is done only once + + // Helper to reset silence timer + function resetSilenceTimer() { + if (silenceTimer) clearTimeout(silenceTimer); + if (hasHeardSpeech) { + silenceTimer = setTimeout(() => stopRecording(), SILENCE_DURATION); + } + } + + // Stop recording + function stopRecording() { + if (!recording) return; + recording = false; + ai.quit(); + fileWriter.end(); + } + + // We wrap everything in a promise so we can await the transcription + return new Promise((resolve, reject) => { + // Attach event handlers + ai.on('data', (chunk) => { + fileWriter.write(chunk); + + // Calculate RMS for threshold detection + let sumSquares = 0; + const sampleCount = chunk.length / 2; + for (let i = 0; i < chunk.length; i += 2) { + const sample = chunk.readInt16LE(i); + sumSquares += sample * sample; + } + const rms = Math.sqrt(sumSquares / sampleCount); + + // If RMS passes threshold, we've heard speech + if (rms > RMS_THRESHOLD) { + if (!hasHeardSpeech) { + hasHeardSpeech = true; + } + resetSilenceTimer(); + } + }); + + ai.on('error', (err) => { + console.error("[STT] AudioIO error:", err); + cleanupListeners(); + // Don't reject here, as continuousLoop should continue. Resolve with null. + resolve(null); + }); + + fileWriter.on('finish', async () => { + if (finished) return; + finished = true; + try { + // Check audio duration + const stats = fs.statSync(outFile); + const headerSize = 44; // standard WAV header size + const dataSize = stats.size - headerSize; + const duration = dataSize / (SAMPLE_RATE * (BIT_DEPTH / 8)); + if (duration < 2.75) { + console.log("[STT] Audio too short (<2.75s); discarding."); + fs.unlink(outFile, () => {}); + cleanupListeners(); + return resolve(null); + } + + // Transcribe + const groqTTS = new GroqCloudTTS(); + const text = await groqTTS.transcribe(outFile, { + model: "distil-whisper-large-v3-en", + prompt: "", + response_format: "json", + language: "en", + temperature: 0.0 + }); + + fs.unlink(outFile, () => {}); // cleanup WAV file + + // Basic check for empty or whitespace + if (!text || !text.trim()) { + console.log("[STT] Transcription empty; discarding."); + cleanupListeners(); + return resolve(null); + } + + // Heuristic checks to determine if the transcription is genuine + + // 1. Ensure at least one alphabetical character + if (!/[A-Za-z]/.test(text)) { + console.log("[STT] Transcription has no letters; discarding."); + cleanupListeners(); + return resolve(null); + } + + // 2. Check for gibberish repeated sequences + if (/([A-Za-z])\1{3,}/.test(text)) { + console.log("[STT] Transcription looks like gibberish; discarding."); + cleanupListeners(); + return resolve(null); + } + + // 3. Check transcription length, with allowed greetings + const letterCount = text.replace(/[^A-Za-z]/g, "").length; + const normalizedText = text.trim().toLowerCase(); + const allowedGreetings = new Set(["hi", "hello", "greetings", "hey"]); + + if (letterCount < 8 && !allowedGreetings.has(normalizedText)) { + console.log("[STT] Transcription too short and not an allowed greeting; discarding."); + cleanupListeners(); + return resolve(null); + } + + console.log("[STT] Transcription:", text); + + // Format message so it looks like: "[SERVER] message" + const finalMessage = `[${STT_USERNAME}] ${text}`; + + // If STT_AGENT_NAME is empty, broadcast to all agents + if (!STT_AGENT_NAME.trim()) { + const agentNames = getAllInGameAgentNames(); // from mind_server + for (const agentName of agentNames) { + getIO().emit('send-message', agentName, finalMessage); + } + } else { + // Otherwise, send only to the specified agent + getIO().emit('send-message', STT_AGENT_NAME, finalMessage); + } + + cleanupListeners(); + resolve(text); + } catch (err) { + console.error("[STT] Error during transcription or sending message:", err); + fs.unlink(outFile, () => {}); // Attempt cleanup even on error + cleanupListeners(); + reject(err); // Propagate error for continuousLoop to catch + } + }); + + ai.start(); + + function cleanupListeners() { + if (ai && typeof ai.removeAllListeners === 'function') { + ai.removeAllListeners('data'); + ai.removeAllListeners('error'); + } + if (fileWriter && typeof fileWriter.removeAllListeners === 'function') { + fileWriter.removeAllListeners('finish'); + } + if (silenceTimer) clearTimeout(silenceTimer); + + // release lock + isRecording = false; + } + }); +} + +/** + * Runs recording sessions sequentially, so only one at a time + */ +async function continuousLoop() { + // This check is now more critical as AudioIO might not be available + if (!AudioIO) { + console.warn("[STT] AudioIO not available. STT continuous loop cannot start."); + sttRunning = false; // Ensure this is marked as not running + return; + } + + while (sttRunning) { // Check sttRunning to allow loop to terminate if STT is disabled later + try { + await recordAndTranscribeOnce(); + } catch (err) { + // Errors from recordAndTranscribeOnce (like transcription errors) are caught here + console.error("[STT Error in continuousLoop]", err); + // Potentially add a longer delay or a backoff mechanism if errors are persistent + } + // short gap, but only if stt is still supposed to be running + if (sttRunning) { + await new Promise(res => setTimeout(res, 1000)); + } + } + console.log("[STT] Continuous loop ended."); +} + +export function initTTS() { + if (!settings.stt_transcription) { + console.log("[STT] STT transcription is disabled in settings."); + sttRunning = false; // Ensure it's marked as not running + return; + } + + // This check is crucial: if AudioIO (from naudiodon) wasn't loaded, STT cannot run. + if (!AudioIO) { + console.warn("[STT] AudioIO is not available (naudiodon might have failed to load). STT functionality cannot be initialized."); + sttRunning = false; // Ensure sttRunning is false if it was somehow true + return; + } + + if (sttRunning) { + console.log("[STT] STT loop already running; skipping re-init."); + return; + } + + console.log("[STT] Initializing STT..."); + sttRunning = true; // Set before starting the loop + + continuousLoop().catch((err) => { + console.error("[STT] continuousLoop crashed unexpectedly:", err); + sttRunning = false; // Mark as not running if it crashes + }); +} + +// Moved initTTS() call into the async IIFE after naudiodon import attempt. +// initTTS();