diff --git a/README.md b/README.md
index aa3945e..7f422ff 100644
--- a/README.md
+++ b/README.md
@@ -120,6 +120,21 @@ When running in docker, if you want the bot to join your local minecraft server,
 
 To connect to an unsupported minecraft version, you can try to use [viaproxy](services/viaproxy/README.md)
 
+## STT in Mindcraft
+
+STT allows you to speak to the model if you have a microphone
+
+STT can be enabled in `settings.js` under the section that looks like this:
+```javascript
+    "stt_transcription": true, // Change this to "true" to enable STT
+    "stt_username": "SYSTEM",
+    "stt_agent_name": ""
+```
+
+The Text to Speech engine will begin listening on the system default input device.
+
+When using STT, you **need** a [GroqCloud API key](https://console.groq.com/keys) as Groq is used for Audio transcription
+
 # Bot Profiles
 
 Bot profiles are json files (such as `andy.json`) that define:
diff --git a/keys.example.json b/keys.example.json
index 99286c5..d9edf8b 100644
--- a/keys.example.json
+++ b/keys.example.json
@@ -1,17 +1,17 @@
-{
-    "OPENAI_API_KEY": "",
-    "OPENAI_ORG_ID": "",
-    "GEMINI_API_KEY": "",
-    "ANTHROPIC_API_KEY": "",
-    "REPLICATE_API_KEY": "",
-    "GROQCLOUD_API_KEY": "",
-    "HUGGINGFACE_API_KEY": "",
-    "QWEN_API_KEY": "",
-    "XAI_API_KEY": "",
-    "MISTRAL_API_KEY": "",
-    "DEEPSEEK_API_KEY": "",
-    "GHLF_API_KEY": "",
-    "HYPERBOLIC_API_KEY": "",
-    "NOVITA_API_KEY": "",
-    "OPENROUTER_API_KEY": ""
-}
+{
+    "OPENAI_API_KEY": "",
+    "OPENAI_ORG_ID": "",
+    "GEMINI_API_KEY": "",
+    "ANTHROPIC_API_KEY": "",
+    "REPLICATE_API_KEY": "",
+    "GROQCLOUD_API_KEY": "",
+    "HUGGINGFACE_API_KEY": "",
+    "QWEN_API_KEY": "",
+    "XAI_API_KEY": "",
+    "MISTRAL_API_KEY": "",
+    "DEEPSEEK_API_KEY": "",
+    "GHLF_API_KEY": "",
+    "HYPERBOLIC_API_KEY": "",
+    "NOVITA_API_KEY": "",
+    "OPENROUTER_API_KEY": ""
+}
diff --git a/main.js b/main.js
index 521aadf..e5db05c 100644
--- a/main.js
+++ b/main.js
@@ -5,6 +5,7 @@ import { hideBin } from 'yargs/helpers';
 import { createMindServer } from './src/server/mind_server.js';
 import { mainProxy } from './src/process/main_proxy.js';
 import { readFileSync } from 'fs';
+import { initTTS } from './src/process/tts_process.js';
 
 function parseArguments() {
     return yargs(hideBin(process.argv))
@@ -39,7 +40,7 @@ async function main() {
     const profiles = getProfiles(args);
     console.log(profiles);
     const { load_memory, init_message } = settings;
-
+    
     for (let i=0; i<profiles.length; i++) {
         const agent_process = new AgentProcess();
         const profile = readFileSync(profiles[i], 'utf8');
@@ -48,6 +49,7 @@ async function main() {
         agent_process.start(profiles[i], load_memory, init_message, i, args.task_path, args.task_id);
         await new Promise(resolve => setTimeout(resolve, 1000));
     }
+    initTTS();
 }
 
 try {
diff --git a/package.json b/package.json
index bb3fd90..c713c92 100644
--- a/package.json
+++ b/package.json
@@ -9,7 +9,8 @@
         "cheerio": "^1.0.0",
         "express": "^4.18.2",
         "google-translate-api-x": "^10.7.1",
-        "groq-sdk": "^0.15.0",
+        "groq-sdk": "^0.5.0",
+        "mic": "^2.1.2",
         "minecraft-data": "^3.78.0",
         "mineflayer": "^4.26.0",
         "mineflayer-armor-manager": "^2.0.1",
@@ -17,6 +18,7 @@
         "mineflayer-collectblock": "^1.4.1",
         "mineflayer-pathfinder": "^2.4.5",
         "mineflayer-pvp": "^1.3.2",
+        "naudiodon": "^2.3.6",
         "node-canvas-webgl": "PrismarineJS/node-canvas-webgl",
         "openai": "^4.4.0",
         "patch-package": "^8.0.0",
@@ -28,6 +30,7 @@
         "socket.io-client": "^4.7.2",
         "three": "^0.128.0",
         "vec3": "^0.1.10",
+        "wav": "^1.0.2",
         "yargs": "^17.7.2"
     },
     "scripts": {
@@ -40,4 +43,4 @@
         "eslint-plugin-no-floating-promise": "^2.0.0",
         "globals": "^15.11.0"
     }
-}
+}
\ No newline at end of file
diff --git a/patches/@google+generative-ai+0.2.1.patch b/patches/@google+generative-ai+0.2.1.patch
index ebdff24..68d8ec6 100644
--- a/patches/@google+generative-ai+0.2.1.patch
+++ b/patches/@google+generative-ai+0.2.1.patch
@@ -1,13 +1,12 @@
 diff --git a/node_modules/@google/generative-ai/dist/index.mjs b/node_modules/@google/generative-ai/dist/index.mjs
-index 23a175b..aab7e19 100644
 --- a/node_modules/@google/generative-ai/dist/index.mjs
 +++ b/node_modules/@google/generative-ai/dist/index.mjs
-@@ -151,7 +151,7 @@ class GoogleGenerativeAIResponseError extends GoogleGenerativeAIError {
-  * limitations under the License.
-  */
- const BASE_URL = "https://generativelanguage.googleapis.com";
+@@ -156,1 +156,1 @@
+-const API_VERSION = "v1";
++const API_VERSION = "v1beta";
+diff --git a/node_modules/@google/generative-ai/dist/index.js b/node_modules/@google/generative-ai/dist/index.js
+--- a/node_modules/@google/generative-ai/dist/index.js
++++ b/node_modules/@google/generative-ai/dist/index.js
+@@ -156,1 +156,1 @@
 -const API_VERSION = "v1";
 +const API_VERSION = "v1beta";
- /**
-  * We can't `require` package.json if this runs on web. We will use rollup to
-  * swap in the version number here at build time.
diff --git a/profiles/llama.json b/profiles/llama.json
index ceb3992..2e9cae0 100644
--- a/profiles/llama.json
+++ b/profiles/llama.json
@@ -7,4 +7,4 @@
 
     "embedding": "openai"
     
-}
\ No newline at end of file
+}
diff --git a/settings.js b/settings.js
index f6713ae..cdfc60e 100644
--- a/settings.js
+++ b/settings.js
@@ -29,7 +29,6 @@ const settings = {
     "load_memory": false, // load memory from previous session
     "init_message": "Respond with hello world and your name", // sends to all on spawn
     "only_chat_with": [], // users that the bots listen to and send general messages to. if empty it will chat publicly
-    "speak": false, // allows all bots to speak through system text-to-speech. works on windows, mac, on linux you need to `apt install espeak`
     "language": "en", // translate to/from this language. Supports these language names: https://cloud.google.com/translate/docs/languages
     "show_bot_views": false, // show bot's view in browser at localhost:3000, 3001...
 
@@ -46,10 +45,15 @@ const settings = {
     "verbose_commands": true, // show full command syntax
     "narrate_behavior": true, // chat simple automatic actions ('Picking up item!')
     "chat_bot_messages": true, // publicly chat messages to other bots
+
+    "stt_transcription": false, // change this to "true" or "false" depending on if you want STT in Mindcraft, STT needs a GroqCloud API key, can be found here: https://console.groq.com/keys
+    "stt_username": "SYSTEM", // Change this to the username the model will respond to.
+    "stt_agent_name": "" // Change the name here to whatever your agent is named, if left empty, will send message to all agents.
+    "speak": false, // allows all bots to speak through system text-to-speech. works on windows, mac, on linux you need to `apt install espeak`
     
-    "log_normal_data": false,
-    "log_reasoning_data": false,
-    "log_vision_data": false,
+    "log_normal_data": false, // Logs all inputs / outputs without reasoning or vision data
+    "log_reasoning_data": false, // Logs only reasoning inputs / outputs
+    "log_vision_data": false, // Logs only vision inputs / outputs
     
 }
 
diff --git a/src/agent/agent.js b/src/agent/agent.js
index 0f391e0..e58687d 100644
--- a/src/agent/agent.js
+++ b/src/agent/agent.js
@@ -20,6 +20,15 @@ import { say } from './speak.js';
 export class Agent {
     async start(profile_fp, load_mem=false, init_message=null, count_id=0, task_path=null, task_id=null) {
         this.last_sender = null;
+        // Safely attach agent instance to a global-like object so STT code can access it.
+        // This works in Node.js ESM or CommonJS. If "global" doesn't exist, fallback to "globalThis".
+        const globalObj = (typeof global !== 'undefined') ? global : globalThis;
+        try {
+            globalObj.agent = this;
+        } catch(e) {
+            console.warn("Failed attaching agent to global object:", e);
+        }
+        
         this.latestScreenshotPath = null;
         this.count_id = count_id;
         if (!profile_fp) {
@@ -126,6 +135,7 @@ export class Agent {
         });
     }
 
+
     async _setupEventHandlers(save_data, init_message) {
         const ignore_messages = [
             "Set own game mode to",
diff --git a/src/models/groq.js b/src/models/groq.js
index fefa8c7..de7ebbd 100644
--- a/src/models/groq.js
+++ b/src/models/groq.js
@@ -1,4 +1,5 @@
 import Groq from 'groq-sdk'
+import fs from "fs";
 import { getKey } from '../utils/keys.js';
 import { log, logVision } from '../../logger.js';
 
@@ -104,3 +105,21 @@ export class GroqCloudAPI {
         throw new Error('Embeddings are not supported by Groq.');
     }
 }
+
+export class GroqCloudTTS {
+  constructor() {
+    this.groq = new Groq({ apiKey: getKey('GROQCLOUD_API_KEY') });
+  }
+
+  async transcribe(filePath, options = {}) {
+    const transcription = await this.groq.audio.transcriptions.create({
+      file: fs.createReadStream(filePath),
+      model: options.model || "distil-whisper-large-v3-en", // or "whisper-large-v3-turbo"
+      prompt: options.prompt || "",
+      response_format: options.response_format || "json",
+      language: options.language || "en",
+      temperature: options.temperature !== undefined ? options.temperature : 0.0,
+    });
+    return transcription.text;
+  }
+}
diff --git a/src/process/tts_process.js b/src/process/tts_process.js
new file mode 100644
index 0000000..5d20259
--- /dev/null
+++ b/src/process/tts_process.js
@@ -0,0 +1,247 @@
+import settings from '../../settings.js';
+import { GroqCloudTTS } from '../models/groq.js';
+import portAudio from 'naudiodon';
+const { AudioIO, SampleFormat16Bit } = portAudio;
+import wav from 'wav';
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+
+// Import getIO and our new function getAllInGameAgentNames
+import { getIO, getAllInGameAgentNames } from '../server/mind_server.js';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+/**
+ * Delete leftover speech_*.wav from previous runs
+ */
+const leftover = fs.readdirSync(__dirname).filter(f => /^speech_\d+\.wav$/.test(f));
+for (const file of leftover) {
+  try {
+    fs.unlinkSync(path.join(__dirname, file));
+  } catch (_) {
+    // ignore errors
+  }
+}
+
+// Configuration
+const RMS_THRESHOLD = 500;     // Lower threshold for faint audio
+const SILENCE_DURATION = 2000; // 2 seconds of silence after speech => stop
+const SAMPLE_RATE = 16000;
+const BIT_DEPTH = 16;
+const STT_USERNAME = settings.stt_username || "SERVER"; // Name that appears as sender
+const STT_AGENT_NAME = settings.stt_agent_name || "";   // If blank, broadcast to all
+
+// Guards to prevent multiple overlapping recordings
+let isRecording = false;  // Ensures only one recordAndTranscribeOnce at a time
+let sttRunning = false;   // Ensures continuousLoop is started only once
+
+/**
+ * Records one session, transcribes, and sends to MindServer as a chat message
+ */
+async function recordAndTranscribeOnce() {
+  // If another recording is in progress, just skip
+  if (isRecording) {
+    console.log("Another recording is still in progress; skipping new record attempt.");
+    return null;
+  }
+  isRecording = true;
+
+  const outFile = path.join(__dirname, `speech_${Date.now()}.wav`);
+  const fileWriter = new wav.FileWriter(outFile, {
+    channels: 1,
+    sampleRate: SAMPLE_RATE,
+    bitDepth: BIT_DEPTH
+  });
+  const ai = new AudioIO({
+    inOptions: {
+      channelCount: 1,
+      sampleFormat: SampleFormat16Bit,
+      sampleRate: SAMPLE_RATE,
+      deviceId: -1,
+      closeOnError: true
+    }
+  });
+
+  let recording = true;
+  let hasHeardSpeech = false;
+  let silenceTimer = null;
+  let finished = false; // Guard to ensure final processing is done only once
+
+  // Helper to reset silence timer
+  function resetSilenceTimer() {
+    if (silenceTimer) clearTimeout(silenceTimer);
+    if (hasHeardSpeech) {
+      silenceTimer = setTimeout(() => stopRecording(), SILENCE_DURATION);
+    }
+  }
+
+  // Stop recording
+  function stopRecording() {
+    if (!recording) return;
+    recording = false;
+    ai.quit();
+    fileWriter.end();
+  }
+
+  // We wrap everything in a promise so we can await the transcription
+  return new Promise((resolve, reject) => {
+    // Attach event handlers
+    ai.on('data', (chunk) => {
+      fileWriter.write(chunk);
+
+      // Calculate RMS for threshold detection
+      let sumSquares = 0;
+      const sampleCount = chunk.length / 2;
+      for (let i = 0; i < chunk.length; i += 2) {
+        const sample = chunk.readInt16LE(i);
+        sumSquares += sample * sample;
+      }
+      const rms = Math.sqrt(sumSquares / sampleCount);
+
+      // If RMS passes threshold, we've heard speech
+      if (rms > RMS_THRESHOLD) {
+        if (!hasHeardSpeech) {
+          hasHeardSpeech = true;
+        }
+        resetSilenceTimer();
+      }
+    });
+
+    ai.on('error', (err) => {
+      cleanupListeners();
+      reject(err);
+    });
+
+    fileWriter.on('finish', async () => {
+      if (finished) return;
+      finished = true;
+      try {
+        // Check audio duration
+        const stats = fs.statSync(outFile);
+        const headerSize = 44; // standard WAV header size
+        const dataSize = stats.size - headerSize;
+        const duration = dataSize / (SAMPLE_RATE * (BIT_DEPTH / 8));
+        if (duration < 2.75) {
+          console.log("Audio too short (<2.75s); discarding.");
+          fs.unlink(outFile, () => {});
+          cleanupListeners();
+          return resolve(null);
+        }
+
+        // Transcribe
+        const groqTTS = new GroqCloudTTS();
+        const text = await groqTTS.transcribe(outFile, {
+          model: "distil-whisper-large-v3-en",
+          prompt: "",
+          response_format: "json",
+          language: "en",
+          temperature: 0.0
+        });
+
+        fs.unlink(outFile, () => {}); // cleanup WAV file
+
+        // Basic check for empty or whitespace
+        if (!text || !text.trim()) {
+          console.log("Transcription empty; discarding.");
+          cleanupListeners();
+          return resolve(null);
+        }
+
+        // Heuristic checks to determine if the transcription is genuine
+        
+        // 1. Ensure at least one alphabetical character
+        if (!/[A-Za-z]/.test(text)) {
+          console.log("Transcription has no letters; discarding.");
+          cleanupListeners();
+          return resolve(null);
+        }
+
+        // 2. Check for gibberish repeated sequences
+        if (/([A-Za-z])\1{3,}/.test(text)) {
+          console.log("Transcription looks like gibberish; discarding.");
+          cleanupListeners();
+          return resolve(null);
+        }
+
+        // 3. Check transcription length, with allowed greetings
+        const letterCount = text.replace(/[^A-Za-z]/g, "").length;
+        const normalizedText = text.trim().toLowerCase();
+        const allowedGreetings = new Set(["hi", "hello", "greetings", "hey"]);
+
+        if (letterCount < 8 && !allowedGreetings.has(normalizedText)) {
+          console.log("Transcription too short and not an allowed greeting; discarding.");
+          cleanupListeners();
+          return resolve(null);
+        }
+
+        console.log("Transcription:", text);
+
+        // Format message so it looks like: "[SERVER] message"
+        const finalMessage = `[${STT_USERNAME}] ${text}`;
+
+        // If STT_AGENT_NAME is empty, broadcast to all agents
+        if (!STT_AGENT_NAME.trim()) {
+          const agentNames = getAllInGameAgentNames(); // from mind_server
+          for (const agentName of agentNames) {
+            getIO().emit('send-message', agentName, finalMessage);
+          }
+        } else {
+          // Otherwise, send only to the specified agent
+          getIO().emit('send-message', STT_AGENT_NAME, finalMessage);
+        }
+
+        cleanupListeners();
+        resolve(text);
+      } catch (err) {
+        cleanupListeners();
+        reject(err);
+      }
+    });
+
+    ai.start();
+
+    function cleanupListeners() {
+      ai.removeAllListeners('data');
+      ai.removeAllListeners('error');
+      fileWriter.removeAllListeners('finish');
+      if (silenceTimer) clearTimeout(silenceTimer);
+
+      // release lock
+      isRecording = false;
+    }
+  });
+}
+
+/**
+ * Runs recording sessions sequentially, so only one at a time
+ */
+async function continuousLoop() {
+  while (true) {
+    try {
+      await recordAndTranscribeOnce();
+    } catch (err) {
+      console.error("[STT Error]", err);
+    }
+    // short gap
+    await new Promise(res => setTimeout(res, 1000));
+  }
+}
+
+export function initTTS() {
+  // Only run if stt_transcription is true and we haven't started already
+  if (!settings.stt_transcription) return;
+
+  if (sttRunning) {
+    console.log("STT loop already running; skipping re-init.");
+    return;
+  }
+  sttRunning = true;
+
+  continuousLoop().catch((err) => {
+    console.error("[STT] continuousLoop crashed", err);
+  });
+}
+
+initTTS();
diff --git a/src/server/mind_server.js b/src/server/mind_server.js
index eed71d7..7bf530a 100644
--- a/src/server/mind_server.js
+++ b/src/server/mind_server.js
@@ -161,3 +161,6 @@ function stopAllAgents() {
 export const getIO = () => io;
 export const getServer = () => server;
 export const getConnectedAgents = () => connectedAgents; 
+export function getAllInGameAgentNames() {
+    return Object.keys(inGameAgents);
+  }