Add files via upload

2025-09-10 12:02:59 +02:00 · 2025-02-18 17:58:52 -08:00 · 2025-02-18 17:58:52 -08:00 · 66ca5f7c4e
commit 66ca5f7c4e
parent 7d9257036c
14 changed files with 5241 additions and 84 deletions
--- a/main.js
+++ b/main.js
@ -5,6 +5,7 @@ import { hideBin } from 'yargs/helpers';
 import { createMindServer } from './src/server/mind_server.js';
 import { mainProxy } from './src/process/main_proxy.js';
 import { readFileSync } from 'fs';
+import { initTTS } from './src/process/tts_process.js';

 function parseArguments() {
    return yargs(hideBin(process.argv))
@ -38,16 +39,25 @@ async function main() {
    const args = parseArguments();
    const profiles = getProfiles(args);
    console.log(profiles);
+
    const { load_memory, init_message } = settings;

-    for (let i=0; i<profiles.length; i++) {
+    // Start each agent in turn
+    for (let i = 0; i < profiles.length; i++) {
        const agent_process = new AgentProcess();
        const profile = readFileSync(profiles[i], 'utf8');
        const agent_json = JSON.parse(profile);
+
        mainProxy.registerAgent(agent_json.name, agent_process);
+
        agent_process.start(profiles[i], load_memory, init_message, i, args.task_path, args.task_id);
+
+        // A small delay so we don't start them all at once
        await new Promise(resolve => setTimeout(resolve, 1000));
    }
+
+    // NEW: Finally, kick off TTS (will only run if tts_transcription is true in settings)
+    initTTS();
 }

 try {
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -9,6 +9,7 @@
        "express": "^4.18.2",
        "google-translate-api-x": "^10.7.1",
        "groq-sdk": "^0.5.0",
+        "mic": "^2.1.2",
        "minecraft-data": "^3.78.0",
        "mineflayer": "^4.23.0",
        "mineflayer-armor-manager": "^2.0.1",
@ -16,6 +17,7 @@
        "mineflayer-collectblock": "^1.4.1",
        "mineflayer-pathfinder": "^2.4.5",
        "mineflayer-pvp": "^1.3.2",
+        "naudiodon": "^2.3.6",
        "openai": "^4.4.0",
        "patch-package": "^8.0.0",
        "prismarine-item": "^1.15.0",
@ -25,6 +27,7 @@
        "socket.io": "^4.7.2",
        "socket.io-client": "^4.7.2",
        "vec3": "^0.1.10",
+        "wav": "^1.0.2",
        "yargs": "^17.7.2"
    },
    "scripts": {
--- a/patches/@google+generative-ai+0.2.1.patch
+++ b/patches/@google+generative-ai+0.2.1.patch
@ -0,0 +1,12 @@
+diff --git a/node_modules/@google/generative-ai/dist/index.mjs b/node_modules/@google/generative-ai/dist/index.mjs
+--- a/node_modules/@google/generative-ai/dist/index.mjs
+++ b/node_modules/@google/generative-ai/dist/index.mjs
+@@ -156,1 +156,1 @@
+-const API_VERSION = "v1";
+const API_VERSION = "v1beta";
+diff --git a/node_modules/@google/generative-ai/dist/index.js b/node_modules/@google/generative-ai/dist/index.js
+--- a/node_modules/@google/generative-ai/dist/index.js
+++ b/node_modules/@google/generative-ai/dist/index.js
+@@ -156,1 +156,1 @@
+-const API_VERSION = "v1";
+const API_VERSION = "v1beta";
--- a/profiles/gemini.json
+++ b/profiles/gemini.json
@ -1,7 +1,7 @@
 {
    "name": "gemini",

-    "model": "gemini-1.5-flash",
+    "model": "gemini-2.0-flash-lite-preview-02-05",

    "cooldown": 10000
 }
--- a/profiles/llama.json
+++ b/profiles/llama.json
@ -5,6 +5,6 @@

    "max_tokens": 4000,

-    "embedding": "openai"
+    "embedding": "google"
    
 }
--- a/settings.js
+++ b/settings.js
@ -44,4 +44,9 @@ export default
    "verbose_commands": true, // show full command syntax
    "narrate_behavior": true, // chat simple automatic actions ('Picking up item!')
    "chat_bot_messages": true, // publicly chat messages to other bots
+
+    // New section for advanced features that will be added like Vision or Model speech, for now though, just TTS :)
+    "tts_transcription": false, // change this to "true" or "false" depending on iff you want TTS in Mindcraft, TTS needs a GroqCloud API key, can be found here: https://console.groq.com/keys
+    "tts_username": "SYSTEM", // Change this to the username the model will respond to.
+    "tts_agent_name": "" // Change the name here to whatever your agent is named, if left empty, will send message to all agents.
 }
--- a/src/agent/agent.js
+++ b/src/agent/agent.js
@ -18,6 +18,15 @@ import { Task } from './tasks.js';
 export class Agent {
    async start(profile_fp, load_mem=false, init_message=null, count_id=0, task_path=null, task_id=null) {
        this.last_sender = null;
+        // Safely attach agent instance to a global-like object so TTS code can access it.
+        // This works in Node.js ESM or CommonJS. If "global" doesn't exist, fallback to "globalThis".
+        const globalObj = (typeof global !== 'undefined') ? global : globalThis;
+        try {
+            globalObj.agent = this;
+        } catch(e) {
+            console.warn("Failed attaching agent to global object:", e);
+        }
+        
        this.count_id = count_id;
        try {
            if (!profile_fp) {
@ -26,25 +35,33 @@ export class Agent {

            console.log('Starting agent initialization with profile:', profile_fp);

-            // Initialize components with more detailed error handling
            console.log('Initializing action manager...');
            this.actions = new ActionManager(this);
+
            console.log('Initializing prompter...');
            this.prompter = new Prompter(this, profile_fp);
            this.name = this.prompter.getName();
+
            console.log('Initializing history...');
            this.history = new History(this);
+
            console.log('Initializing coder...');
            this.coder = new Coder(this);
+
            console.log('Initializing npc controller...');
            this.npc = new NPCContoller(this);
+
            console.log('Initializing memory bank...');
            this.memory_bank = new MemoryBank();
+
            console.log('Initializing self prompter...');
            this.self_prompter = new SelfPrompter(this);
+
            convoManager.initAgent(this);
+            
            console.log('Initializing examples...');
            await this.prompter.initExamples();
+
            console.log('Initializing task...');
            this.task = new Task(this, task_path, task_id);
            const blocked_actions = this.task.blocked_actions || [];
@ -64,25 +81,25 @@ export class Agent {

            this.bot.on('login', () => {
                console.log(this.name, 'logged in!');
-
                serverProxy.login();

-                // Set skin for profile, requires Fabric Tailor. (https://modrinth.com/mod/fabrictailor)
-                if (this.prompter.profile.skin)
+                if (this.prompter.profile.skin) {
                    this.bot.chat(`/skin set URL ${this.prompter.profile.skin.model} ${this.prompter.profile.skin.path}`);
-                else
+                } else {
                    this.bot.chat(`/skin clear`);
+                }
            });

            const spawnTimeout = setTimeout(() => {
                process.exit(0);
            }, 30000);
+
            this.bot.once('spawn', async () => {
                try {
                    clearTimeout(spawnTimeout);
                    addViewer(this.bot, count_id);

-                    // wait for a bit so stats are not undefined
+                    // wait briefly so stats are not undefined
                    await new Promise((resolve) => setTimeout(resolve, 1000));

                    console.log(`${this.name} spawned.`);
@ -94,19 +111,15 @@ export class Agent {
                    if (!load_mem) {
                        this.task.initBotTask();
                    }
-
                } catch (error) {
                    console.error('Error in spawn event:', error);
                    process.exit(0);
                }
            });
        } catch (error) {
-            // Ensure we're not losing error details
-            console.error('Agent start failed with error')
-            console.error(error.message);
-            console.error(error.stack);
-
-            throw error; // Re-throw with preserved details
+            console.error('Agent start failed with error');
+            console.error(error);
+            throw error;
        }
    }

--- a/src/models/groq.js
+++ b/src/models/groq.js
@ -1,23 +1,20 @@
-import Groq from 'groq-sdk'
+import Groq from 'groq-sdk';
+import fs from "fs";
 import { getKey } from '../utils/keys.js';

-
-// Umbrella class for Mixtral, LLama, Gemma...
 export class GroqCloudAPI {
  constructor(model_name, url, params) {
    this.model_name = model_name;
    this.url = url;
    this.params = params || {};
-        // ReplicateAPI theft :3
    if (this.url) {
-
      console.warn("Groq Cloud has no implementation for custom URLs. Ignoring provided URL.");
    }
    this.groq = new Groq({ apiKey: getKey('GROQCLOUD_API_KEY') });
  }

-    async sendRequest(turns, systemMessage, stop_seq=null) {
-        let messages = [{"role": "system", "content": systemMessage}].concat(turns);
+  async sendRequest(turns, systemMessage, stop_seq = null) {
+    let messages = [{ role: "system", content: systemMessage }].concat(turns);
    let res = null;
    try {
      console.log("Awaiting Groq response...");
@ -25,11 +22,11 @@ export class GroqCloudAPI {
        this.params.max_tokens = 16384;
      }
      let completion = await this.groq.chat.completions.create({
-                "messages": messages,
-                "model": this.model_name || "mixtral-8x7b-32768",
-                "stream": true,
-                "stop": stop_seq,
-                ...(this.params || {})
+        messages,
+        model: this.model_name || "mixtral-8x7b-32768",
+        stream: true,
+        stop: stop_seq,
+        ...this.params
      });

      let temp_res = "";
@ -38,9 +35,7 @@ export class GroqCloudAPI {
      }

      res = temp_res;
-
-        }
-        catch(err) {
+    } catch (err) {
      console.log(err);
      res = "My brain just kinda stopped working. Try again.";
    }
@ -51,3 +46,21 @@ export class GroqCloudAPI {
    throw new Error('Embeddings are not supported by Groq.');
  }
 }
+
+export class GroqCloudTTS {
+  constructor() {
+    this.groq = new Groq({ apiKey: getKey('GROQCLOUD_API_KEY') });
+  }
+
+  async transcribe(filePath, options = {}) {
+    const transcription = await this.groq.audio.transcriptions.create({
+      file: fs.createReadStream(filePath),
+      model: options.model || "distil-whisper-large-v3-en", // or "whisper-large-v3-turbo", etc.
+      prompt: options.prompt || "",
+      response_format: options.response_format || "json",
+      language: options.language || "en",
+      temperature: options.temperature !== undefined ? options.temperature : 0.0,
+    });
+    return transcription.text;
+  }
+}
--- a/src/process/init_agent.js
+++ b/src/process/init_agent.js
@ -58,8 +58,7 @@ const argv = yargs(args)
        await agent.start(argv.profile, argv.load_memory, argv.init_message, argv.count_id, argv.task_path, argv.task_id);
    } catch (error) {
        console.error('Failed to start agent process:');
-        console.error(error.message);
-        console.error(error.stack);
+        console.error(error);
        process.exit(1);
    }
 })();
--- a/src/process/tts_process.js
+++ b/src/process/tts_process.js
@ -0,0 +1,170 @@
+// ============================ tts_process.js ============================
+import settings from '../../settings.js';
+import { GroqCloudTTS } from '../models/groq.js';
+import portAudio from 'naudiodon';
+const { AudioIO, SampleFormat16Bit } = portAudio;
+import wav from 'wav';
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+
+// Import getIO and our new function getAllInGameAgentNames
+import { getIO, getAllInGameAgentNames } from '../server/mind_server.js';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+/**
+ * Delete leftover speech_*.wav from previous runs
+ */
+const leftover = fs.readdirSync(__dirname).filter(f => /^speech_\d+\.wav$/.test(f));
+for (const file of leftover) {
+  try {
+    fs.unlinkSync(path.join(__dirname, file));
+  } catch (_) {
+    // ignore errors
+  }
+}
+
+// Configuration
+const RMS_THRESHOLD = 500;          // Lower threshold for faint audio
+const SILENCE_DURATION = 2000;      // 2 seconds of silence after speech => stop
+const SAMPLE_RATE = 16000;
+const BIT_DEPTH = 16;
+const TTS_USERNAME = settings.tts_username || "SERVER";        // Name that appears as sender
+const TTS_AGENT_NAME = settings.tts_agent_name || "";          // If blank, broadcast to all
+
+/**
+ * Records one session, transcribes, and sends to MindServer as a chat message
+ */
+function recordAndTranscribeOnce() {
+  return new Promise((resolve, reject) => {
+    const outFile = path.join(__dirname, `speech_${Date.now()}.wav`);
+    const fileWriter = new wav.FileWriter(outFile, {
+      channels: 1,
+      sampleRate: SAMPLE_RATE,
+      bitDepth: BIT_DEPTH
+    });
+    const ai = new AudioIO({
+      inOptions: {
+        channelCount: 1,
+        sampleFormat: SampleFormat16Bit,
+        sampleRate: SAMPLE_RATE,
+        deviceId: -1,
+        closeOnError: true
+      }
+    });
+
+    let recording = true;
+    let hasHeardSpeech = false;
+    let silenceTimer = null;
+
+    function resetSilenceTimer() {
+      if (silenceTimer) clearTimeout(silenceTimer);
+      if (hasHeardSpeech) {
+        silenceTimer = setTimeout(() => stopRecording(), SILENCE_DURATION);
+      }
+    }
+
+    function stopRecording() {
+      if (!recording) return;
+      recording = false;
+      ai.quit();
+      fileWriter.end();
+    }
+
+    ai.on('data', (chunk) => {
+      fileWriter.write(chunk);
+
+      // Calculate RMS
+      let sumSquares = 0;
+      const sampleCount = chunk.length / 2;
+      for (let i = 0; i < chunk.length; i += 2) {
+        const sample = chunk.readInt16LE(i);
+        sumSquares += sample * sample;
+      }
+      const rms = Math.sqrt(sumSquares / sampleCount);
+
+      if (rms > RMS_THRESHOLD) {
+        if (!hasHeardSpeech) {
+          hasHeardSpeech = true;
+          console.log("Speech detected");
+        }
+        resetSilenceTimer();
+      }
+    });
+
+    ai.on('error', (err) => {
+      reject(err);
+    });
+
+    // Once the WAV file is finalized, transcribe
+    fileWriter.on('finish', async () => {
+      try {
+        const groqTTS = new GroqCloudTTS();
+        const text = await groqTTS.transcribe(outFile, {
+          model: "distil-whisper-large-v3-en",
+          prompt: "",
+          response_format: "json",
+          language: "en",
+          temperature: 0.0
+        });
+
+        fs.unlink(outFile, () => {}); // Clean up wav file
+
+        // If Whisper returned nothing or just whitespace, discard
+        if (!text || !text.trim()) {
+          console.log("Transcription empty, discarding.");
+          return resolve(null);
+        }
+
+        console.log("Transcription:", text);
+
+        // Format message so it looks like: "[SERVER] hello there"
+        const finalMessage = `[${TTS_USERNAME}] ${text}`;
+
+        // If TTS_AGENT_NAME is empty, broadcast to all agents
+        if (!TTS_AGENT_NAME.trim()) {
+          const agentNames = getAllInGameAgentNames(); // from mind_server
+          for (const agentName of agentNames) {
+            getIO().emit('send-message', agentName, finalMessage);
+          }
+        } else {
+          // Otherwise, send only to the specified agent
+          getIO().emit('send-message', TTS_AGENT_NAME, finalMessage);
+        }
+
+        resolve(text);
+      } catch (err) {
+        reject(err);
+      }
+    });
+
+    ai.start();
+  });
+}
+
+/**
+ * Runs recording sessions sequentially so only one at a time
+ */
+async function continuousLoop() {
+  while (true) {
+    try {
+      await recordAndTranscribeOnce();
+    } catch (err) {
+      console.error("[TTS Error]", err);
+    }
+    // short gap
+    await new Promise(res => setTimeout(res, 1000));
+  }
+}
+
+/**
+ * Initialize TTS if enabled
+ */
+export function initTTS() {
+  if (!settings.tts_transcription) return;
+  continuousLoop().catch(() => {});
+}
+
+initTTS();
--- a/src/server/mind_server.js
+++ b/src/server/mind_server.js
@ -161,3 +161,7 @@ function stopAllAgents() {
 export const getIO = () => io;
 export const getServer = () => server;
 export const getConnectedAgents = () => connectedAgents; 
+export function getAllInGameAgentNames() {
+    return Object.keys(inGameAgents);
+  }
+