From 19b69efd67d85dcc28e32056a47fd35047fba588 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 7 Jun 2025 23:13:07 +0000 Subject: [PATCH 01/26] Fix: Use mic as fallback for STT if naudiodon fails This commit addresses an issue where Speech-to-Text (STT) functionality would be disabled if the `naudiodon` package failed to build during installation. The `src/process/tts_process.js` file (which handles STT) has been modified to: 1. Attempt to load `naudiodon` first. 2. If `naudiodon` fails to load, attempt to load the `mic` package as an alternative. 3. The audio recording logic has been adapted to work with both `naudiodon` and `mic` APIs. Additionally, `package.json` has been updated to move `mic` from `dependencies` to `optionalDependencies`, making its behavior consistent with `naudiodon`. This change provides a fallback mechanism for audio recording, increasing the robustness of the STT feature across different platforms and environments where `naudiodon` might have build issues. --- package.json | 4 +- src/process/tts_process.js | 227 +++++++++++++++++++++++++++++-------- 2 files changed, 179 insertions(+), 52 deletions(-) diff --git a/package.json b/package.json index 25b6235..a8ba1be 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,6 @@ "express": "^4.18.2", "google-translate-api-x": "^10.7.1", "groq-sdk": "^0.5.0", - "mic": "^2.1.2", "minecraft-data": "^3.78.0", "mineflayer": "^4.26.0", "mineflayer-armor-manager": "^2.0.1", @@ -33,7 +32,8 @@ "yargs": "^17.7.2" }, "optionalDependencies": { - "naudiodon": "^2.3.6" + "naudiodon": "^2.3.6", + "mic": "^2.1.2" }, "scripts": { "postinstall": "patch-package", diff --git a/src/process/tts_process.js b/src/process/tts_process.js index 59d97e5..2ce3dd5 100644 --- a/src/process/tts_process.js +++ b/src/process/tts_process.js @@ -17,32 +17,55 @@ const __dirname = path.dirname(__filename); let portAudio; let AudioIO; let SampleFormat16Bit; +let mic; // For mic library +let activeAudioLibrary = null; // 'naudiodon' or 'mic' (async () => { try { const naudiodonModule = await import('naudiodon'); - portAudio = naudiodonModule.default; // CommonJS modules often export functionality on 'default' when imported into ES modules + portAudio = naudiodonModule.default; if (portAudio && typeof portAudio.AudioIO === 'function' && typeof portAudio.SampleFormat16Bit !== 'undefined') { AudioIO = portAudio.AudioIO; SampleFormat16Bit = portAudio.SampleFormat16Bit; + activeAudioLibrary = 'naudiodon'; console.log('[STT] naudiodon loaded successfully.'); } else if (naudiodonModule.AudioIO && typeof naudiodonModule.SampleFormat16Bit !== 'undefined') { - // Fallback if 'default' is not used and properties are directly on the module AudioIO = naudiodonModule.AudioIO; SampleFormat16Bit = naudiodonModule.SampleFormat16Bit; - portAudio = naudiodonModule; // Assign the module itself to portAudio for consistency if needed elsewhere + portAudio = naudiodonModule; + activeAudioLibrary = 'naudiodon'; console.log('[STT] naudiodon loaded successfully (direct properties).'); - } - else { + } else { throw new Error('AudioIO or SampleFormat16Bit not found in naudiodon module exports.'); } } catch (err) { - console.warn(`[STT] Failed to load naudiodon, Speech-to-Text will be disabled. Error: ${err.message}`); + console.warn(`[STT] Failed to load naudiodon. Error: ${err.message}`); portAudio = null; AudioIO = null; SampleFormat16Bit = null; + + // Attempt to load mic if naudiodon fails + try { + const micModule = await import('mic'); + mic = micModule.default; // Assuming mic is also a CommonJS module typically + if (mic && typeof mic === 'function') { // mic is often a constructor function + activeAudioLibrary = 'mic'; + console.log('[STT] mic loaded successfully as an alternative.'); + } else if (micModule.Mic) { // Some modules might export it as Mic + mic = micModule.Mic; + activeAudioLibrary = 'mic'; + console.log('[STT] mic (Mic) loaded successfully as an alternative.'); + } + else { + throw new Error('Mic constructor not found in mic module exports.'); + } + } catch (micErr) { + console.warn(`[STT] Failed to load mic as well. Speech-to-Text will be disabled. Error: ${micErr.message}`); + mic = null; + activeAudioLibrary = null; + } } - // Initialize TTS after attempting to load naudiodon + // Initialize TTS after attempting to load audio libraries initTTS(); })(); @@ -89,22 +112,14 @@ async function recordAndTranscribeOnce() { bitDepth: BIT_DEPTH }); - // This is where AudioIO is crucial - if (!AudioIO || !SampleFormat16Bit) { - console.warn("[STT] AudioIO or SampleFormat16Bit not available. Cannot record audio."); - isRecording = false; - return null; + if (!activeAudioLibrary) { + console.warn("[STT] No audio recording library available (naudiodon or mic). Cannot record audio."); + isRecording = false; + return null; } - const ai = new AudioIO({ - inOptions: { - channelCount: 1, - sampleFormat: SampleFormat16Bit, - sampleRate: SAMPLE_RATE, - deviceId: -1, - closeOnError: true - } - }); + let audioInterface; // Will hold either naudiodon's 'ai' or mic's 'micInstance' + let audioStream; // Will hold either naudiodon's 'ai' or mic's 'micInputStream' let recording = true; let hasHeardSpeech = false; @@ -114,26 +129,126 @@ async function recordAndTranscribeOnce() { // Helper to reset silence timer function resetSilenceTimer() { if (silenceTimer) clearTimeout(silenceTimer); - if (hasHeardSpeech) { - silenceTimer = setTimeout(() => stopRecording(), SILENCE_DURATION); + // Only start silence timer if actual speech has been detected + if (hasHeardSpeech && recording) { // also check `recording` to prevent timer after explicit stop + silenceTimer = setTimeout(() => { + console.log('[STT] Silence detected, stopping recording.'); + stopRecording(); + }, SILENCE_DURATION); } } // Stop recording function stopRecording() { if (!recording) return; - recording = false; - ai.quit(); - fileWriter.end(); + console.log('[STT] stopRecording called.'); + recording = false; // Set recording to false immediately + + if (activeAudioLibrary === 'naudiodon' && audioInterface) { + audioInterface.quit(); + } else if (activeAudioLibrary === 'mic' && audioInterface) { + audioInterface.stop(); // micInstance.stop() + } + // fileWriter.end() will be called by the 'finish' or 'silence' event handlers + // to ensure all data is written before closing the file. + // However, if stopRecording is called externally (e.g. by SILENCE_DURATION timer) + // and not by an event that naturally ends the stream, we might need to end it here. + // Let's defer fileWriter.end() to specific event handlers for now, + // but if issues arise, this is a place to check. + // For now, we rely on 'silence' (mic) or 'quit' sequence (naudiodon) to close writer. } + // We wrap everything in a promise so we can await the transcription return new Promise((resolve, reject) => { - // Attach event handlers - ai.on('data', (chunk) => { + if (activeAudioLibrary === 'naudiodon') { + if (!AudioIO || !SampleFormat16Bit) { // Should have been caught by activeAudioLibrary check, but for safety + console.warn("[STT] Naudiodon not available for recording."); + isRecording = false; + return reject(new Error("Naudiodon not available")); + } + audioInterface = new AudioIO({ // Naudiodon's ai + inOptions: { + channelCount: 1, + sampleFormat: SampleFormat16Bit, + sampleRate: SAMPLE_RATE, + deviceId: -1, // Default device + closeOnError: true + } + }); + audioStream = audioInterface; // For naudiodon, the interface itself is the stream emitter + + audioStream.on('error', (err) => { + console.error("[STT] Naudiodon AudioIO error:", err); + stopRecording(); // Try to stop everything + fileWriter.end(() => fs.unlink(outFile, () => {})); // End writer and delete file + cleanupListeners(); + resolve(null); // Resolve with null as per existing logic for continuousLoop + }); + + } else if (activeAudioLibrary === 'mic') { + // Calculate exitOnSilence for mic. It's in number of 512-byte chunks. + // Each chunk is 256 samples (16-bit, so 2 bytes per sample). + // Duration of one chunk = 256 samples / SAMPLE_RATE seconds. + // Number of chunks for SILENCE_DURATION: + // (SILENCE_DURATION / 1000) / (256 / SAMPLE_RATE) + const micExitOnSilence = Math.ceil((SILENCE_DURATION / 1000) * (SAMPLE_RATE / 256)); + console.log(`[STT] Mic exitOnSilence calculated to: ${micExitOnSilence} frames (for ${SILENCE_DURATION}ms)`); + + audioInterface = new mic({ // micInstance + rate: String(SAMPLE_RATE), + channels: '1', + bitwidth: String(BIT_DEPTH), + endian: 'little', + encoding: 'signed-integer', + device: 'default', // Or settings.audio_input_device + exitOnSilence: micExitOnSilence, // This will trigger 'silence' event + debug: false // settings.debug_audio || false + }); + audioStream = audioInterface.getAudioStream(); + + audioStream.on('error', (err) => { + console.error('[STT] Mic error:', err); + stopRecording(); + fileWriter.end(() => fs.unlink(outFile, () => {})); + cleanupListeners(); + resolve(null); + }); + + audioStream.on('silence', () => { + console.log('[STT] Mic detected silence.'); + // stopRecording(); // This will call micInstance.stop() + // which then triggers processExitComplete. + // Redundant if exitOnSilence is working as expected. + // Let's ensure stopRecording is called to clear timers etc. + if (recording) { // Only call stop if we haven't already stopped for other reasons + stopRecording(); + } + // Important: mic automatically stops on silence. We need to ensure fileWriter is closed. + if (fileWriter && !fileWriter.closed) { + fileWriter.end(); // This will trigger 'finish' on fileWriter + } + }); + + audioStream.on('processExitComplete', () => { + console.log('[STT] Mic processExitComplete.'); + // This indicates mic has fully stopped. + // Ensure fileWriter is ended if not already. + if (fileWriter && !fileWriter.closed) { + console.log('[STT] Mic processExitComplete: Ending fileWriter.'); + fileWriter.end(); + } + // isRecording should be set to false by stopRecording() + }); + } + + // Common event handling for data (applies to both naudiodon ai and micStream) + audioStream.on('data', (chunk) => { + if (!recording) return; // Don't process data if no longer recording + fileWriter.write(chunk); - // Calculate RMS for threshold detection + // Calculate RMS for threshold detection (same logic for both libraries) let sumSquares = 0; const sampleCount = chunk.length / 2; for (let i = 0; i < chunk.length; i += 2) { @@ -151,16 +266,20 @@ async function recordAndTranscribeOnce() { } }); - ai.on('error', (err) => { - console.error("[STT] AudioIO error:", err); - cleanupListeners(); - // Don't reject here, as continuousLoop should continue. Resolve with null. - resolve(null); - }); + // fileWriter.on('finish', ...) remains largely the same but moved outside library-specific setup + // }); // This was part of ai.on('data', ...) which is now common code block. + + // This was ai.on('error',...) specific to naudiodon, now handled above. + // }); fileWriter.on('finish', async () => { + console.log('[STT] FileWriter finished.'); if (finished) return; finished = true; + + // Ensure recording is marked as stopped and lock released + isRecording = false; + if (silenceTimer) clearTimeout(silenceTimer); try { // Check audio duration const stats = fs.statSync(outFile); @@ -246,19 +365,29 @@ async function recordAndTranscribeOnce() { } }); - ai.start(); + // Start the appropriate audio input + if (activeAudioLibrary === 'naudiodon') { + audioInterface.start(); + } else if (activeAudioLibrary === 'mic') { + audioInterface.start(); + } function cleanupListeners() { - if (ai && typeof ai.removeAllListeners === 'function') { - ai.removeAllListeners('data'); - ai.removeAllListeners('error'); + if (audioStream && typeof audioStream.removeAllListeners === 'function') { + audioStream.removeAllListeners('data'); + audioStream.removeAllListeners('error'); + if (activeAudioLibrary === 'mic') { + audioStream.removeAllListeners('silence'); + audioStream.removeAllListeners('processExitComplete'); + } } if (fileWriter && typeof fileWriter.removeAllListeners === 'function') { fileWriter.removeAllListeners('finish'); } if (silenceTimer) clearTimeout(silenceTimer); - // release lock + // release lock if it hasn't been released by fileWriter.on('finish') + // This is a safeguard. isRecording = false; } }); @@ -268,14 +397,13 @@ async function recordAndTranscribeOnce() { * Runs recording sessions sequentially, so only one at a time */ async function continuousLoop() { - // This check is now more critical as AudioIO might not be available - if (!AudioIO) { - console.warn("[STT] AudioIO not available. STT continuous loop cannot start."); - sttRunning = false; // Ensure this is marked as not running + if (!activeAudioLibrary) { + console.warn("[STT] No audio recording library available. STT continuous loop cannot start."); + sttRunning = false; return; } - while (sttRunning) { // Check sttRunning to allow loop to terminate if STT is disabled later + while (sttRunning) { try { await recordAndTranscribeOnce(); } catch (err) { @@ -294,14 +422,13 @@ async function continuousLoop() { export function initTTS() { if (!settings.stt_transcription) { console.log("[STT] STT transcription is disabled in settings."); - sttRunning = false; // Ensure it's marked as not running + sttRunning = false; return; } - // This check is crucial: if AudioIO (from naudiodon) wasn't loaded, STT cannot run. - if (!AudioIO) { - console.warn("[STT] AudioIO is not available (naudiodon might have failed to load). STT functionality cannot be initialized."); - sttRunning = false; // Ensure sttRunning is false if it was somehow true + if (!activeAudioLibrary) { + console.warn("[STT] No audio recording library available (naudiodon or mic failed to load). STT functionality cannot be initialized."); + sttRunning = false; return; } From 296fb1323c43ba47bc301010693a3100064f87de Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 16:17:00 -0700 Subject: [PATCH 02/26] Update settings.js fixed a comma --- settings.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings.js b/settings.js index cdfc60e..215ee77 100644 --- a/settings.js +++ b/settings.js @@ -48,7 +48,7 @@ const settings = { "stt_transcription": false, // change this to "true" or "false" depending on if you want STT in Mindcraft, STT needs a GroqCloud API key, can be found here: https://console.groq.com/keys "stt_username": "SYSTEM", // Change this to the username the model will respond to. - "stt_agent_name": "" // Change the name here to whatever your agent is named, if left empty, will send message to all agents. + "stt_agent_name": "", // Change the name here to whatever your agent is named, if left empty, will send message to all agents. "speak": false, // allows all bots to speak through system text-to-speech. works on windows, mac, on linux you need to `apt install espeak` "log_normal_data": false, // Logs all inputs / outputs without reasoning or vision data From 44be97adc410de8463dc9654e88c288b95d4c416 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 16:35:46 -0700 Subject: [PATCH 03/26] Update huggingface.js Fixed an accidental enter --- src/models/huggingface.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/models/huggingface.js b/src/models/huggingface.js index cbc3abc..d59d3be 100644 --- a/src/models/huggingface.js +++ b/src/models/huggingface.js @@ -25,8 +25,7 @@ export class HuggingFace { const prompt = toSinglePrompt(turns, null, stop_seq); const model_name = this.model_name || 'meta-llama/Meta-Llama-3-8B'; const logInputMessages = [{role: 'system', content: systemMessage}, ...turns]; - const input = systemMessage + " -" + prompt; + const input = systemMessage + "" + prompt; const maxAttempts = 5; let attempt = 0; let finalRes = null; From f0da49403c05e5555741fc28f1b80ef2ec9f8d67 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 16:59:50 -0700 Subject: [PATCH 04/26] Update logger.js Fixed some bugs after testing --- logger.js | 83 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 26 deletions(-) diff --git a/logger.js b/logger.js index 965a1c2..3848349 100644 --- a/logger.js +++ b/logger.js @@ -1,5 +1,3 @@ -// --- START OF FILE logger.js --- - import { writeFileSync, mkdirSync, existsSync, appendFileSync, readFileSync } from 'fs'; import { join } from 'path'; import settings from './settings.js'; // Import settings @@ -133,13 +131,61 @@ function cleanReasoningMarkers(input) { return input.replace(/\/think/g, '').replace(/\/no_think/g, '').trim(); } +// Helper function to clean imagePath from messages for text logs +function cleanImagePathFromMessages(input) { + if (typeof input !== 'string') { + return input; + } + + try { + const parsed = JSON.parse(input); + if (Array.isArray(parsed)) { + const cleaned = parsed.map(msg => { + let cleanedMsg = { ...msg }; // Clone message + + // Remove top-level imagePath + if (cleanedMsg.imagePath !== undefined) { + delete cleanedMsg.imagePath; + } + + // Remove image_url from content array + if (Array.isArray(cleanedMsg.content)) { + cleanedMsg.content = cleanedMsg.content.filter(part => + part.type !== 'image_url' && + !(part.type === 'image' && part.source) // Also filter Claude-style image parts + ); + + // If content becomes empty after filtering, remove it or set to empty string + if (cleanedMsg.content.length === 0) { + cleanedMsg.content = ""; + } else if (cleanedMsg.content.length === 1 && + cleanedMsg.content[0].type === 'text' && + !cleanedMsg.content[0].text?.trim()) { + cleanedMsg.content = ""; + } + } + return cleanedMsg; + }); + return JSON.stringify(cleaned); + } + } catch (e) { + // If not valid JSON, return as-is + return input; + } + + return input; +} + // --- Main Logging Function (for text-based input/output) --- export function log(input, response) { const trimmedInputStr = input ? (typeof input === 'string' ? input.trim() : JSON.stringify(input)) : ""; const trimmedResponse = response ? String(response).trim() : ""; // Ensure response is a string // Clean reasoning markers from input before logging - const cleanedInput = cleanReasoningMarkers(trimmedInputStr); + let cleanedInput = cleanReasoningMarkers(trimmedInputStr); + + // Clean imagePath from messages for text logs (normal/reasoning) + cleanedInput = cleanImagePathFromMessages(cleanedInput); // Basic filtering if (!cleanedInput && !trimmedResponse) { @@ -248,6 +294,7 @@ export function logVision(conversationHistory, imageBuffer, response, visionMess "Context length exceeded", "Image input modality is not enabled", "An unexpected error occurred", + "Image captured for always active vision", // Filter out placeholder responses ]; if (errorMessages.some(err => trimmedResponse.includes(err))) { @@ -271,31 +318,17 @@ export function logVision(conversationHistory, imageBuffer, response, visionMess writeFileSync(imagePath, imageBuffer); logCounts.vision_images_saved++; - // Extract the actual message sent with the image - // This is typically the vision prompt/instruction - let inputMessage = visionMessage; - if (!inputMessage && conversationHistory.length > 0) { - // Try to get the last user message or system message - const lastMessage = conversationHistory[conversationHistory.length - 1]; - if (typeof lastMessage.content === 'string') { - inputMessage = lastMessage.content; - } else if (Array.isArray(lastMessage.content)) { - // Find text content in the message - const textContent = lastMessage.content.find(c => c.type === 'text'); - inputMessage = textContent ? textContent.text : ''; - } - } - - // Fallback to conversation history if no specific message - if (!inputMessage) { - inputMessage = formatConversationInput(conversationHistory); - } + // Clean the conversation history to remove imagePath and image data before logging + const cleanedConversationHistory = JSON.parse(cleanImagePathFromMessages(JSON.stringify(conversationHistory))); + + // Format the complete input as JSON (cleaned conversation history) + const inputData = JSON.stringify(cleanedConversationHistory); // Create metadata entry in JSONL format for HuggingFace const metadataEntry = { file_name: relativeImagePath, - text: inputMessage, - response: trimmedResponse, + input: inputData, // Cleaned JSON conversation history + response: trimmedResponse, // Actual model response, not placeholder timestamp: timestamp }; @@ -397,5 +430,3 @@ function countVisionEntries(metadataFile) { // Initialize counts at startup initializeCounts(); - -// --- END OF FILE logger.js --- \ No newline at end of file From 237f7ce9153ada86779d11fd125d4da6c6411312 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:01:34 -0700 Subject: [PATCH 05/26] Update claude.js Fixed some logging --- src/models/claude.js | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/models/claude.js b/src/models/claude.js index 50e5627..5e19d15 100644 --- a/src/models/claude.js +++ b/src/models/claude.js @@ -100,7 +100,22 @@ export class Claude { if (typeof res === 'string') { res = res.replace(//g, '').replace(/<\/thinking>/g, ''); } - log(JSON.stringify(logMessagesForClaude), res); + + if (imageData) { // If imageData was part of this sendRequest call + let visionPromptText = ""; // Attempt to find the text prompt associated with the image + if (turns.length > 0) { + const lastTurn = messages[messages.length - 1]; // `messages` is strictFormat(turns) + if (lastTurn.role === 'user' && Array.isArray(lastTurn.content)) { + const textPart = lastTurn.content.find(part => part.type === 'text'); + if (textPart) visionPromptText = textPart.text; + } else if (lastTurn.role === 'user' && typeof lastTurn.content === 'string') { + visionPromptText = lastTurn.content; + } + } + logVision(logMessagesForClaude, imageData, res, visionPromptText); + } else { + log(JSON.stringify(logMessagesForClaude), res); + } return res; } @@ -121,7 +136,7 @@ export class Claude { const res = await this.sendRequest(turnsForAPIRequest, systemMessage); if (imageBuffer && res) { - logVision(turns, imageBuffer, res, systemMessage); + logVision([{ role: "system", content: systemMessage }].concat(turns), imageBuffer, res, systemMessage); } return res; } From f6b276b3cfd6873cb717141fb75528a78a0f4649 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:01:58 -0700 Subject: [PATCH 06/26] Update deepseek.js fixed logging --- src/models/deepseek.js | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/models/deepseek.js b/src/models/deepseek.js index ae0e552..53bc638 100644 --- a/src/models/deepseek.js +++ b/src/models/deepseek.js @@ -98,7 +98,24 @@ export class DeepSeek { if (typeof res === 'string') { res = res.replace(//g, '').replace(/<\/thinking>/g, ''); } - log(JSON.stringify(messages), res); + + if (imageData) { // If imageData was part of this sendRequest call + const conversationForLogVision = [{ role: "system", content: systemMessage }].concat(turns); + let visionPromptText = ""; + if (turns.length > 0) { + const lastTurn = messages[messages.length - 1]; // `messages` is after image processing + if (lastTurn.role === 'user' && Array.isArray(lastTurn.content)) { + const textPart = lastTurn.content.find(part => part.type === 'text'); + if (textPart) visionPromptText = textPart.text; + } else if (lastTurn.role === 'user' && typeof lastTurn.content === 'string') { + // This case might not happen if image is added, as content becomes array + visionPromptText = lastTurn.content; + } + } + logVision(conversationForLogVision, imageData, res, visionPromptText); + } else { + log(JSON.stringify([{ role: "system", content: systemMessage }].concat(turns)), res); + } return res; } From 6ae7b82a53bc064483509dcd32a1bc7be2b3a072 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:02:21 -0700 Subject: [PATCH 07/26] Update gemini.js Fixed some logging --- src/models/gemini.js | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/models/gemini.js b/src/models/gemini.js index 3036ef5..3f4bcff 100644 --- a/src/models/gemini.js +++ b/src/models/gemini.js @@ -80,7 +80,21 @@ export class Gemini { if (typeof text === 'string') { text = text.replace(//g, '').replace(/<\/thinking>/g, ''); } - log(JSON.stringify(originalTurnsForLog), text); + + if (imageData) { // If imageData was part of this sendRequest call + let visionPromptText = ""; // Attempt to find the text prompt associated with the image + // `contents` is the array sent to the model + if (contents.length > 0) { + const lastUserTurnParts = contents[contents.length -1].parts; + if (Array.isArray(lastUserTurnParts)) { + const textPart = lastUserTurnParts.find(part => part.text); + if (textPart) visionPromptText = textPart.text; + } + } + logVision(originalTurnsForLog, imageData, text, visionPromptText); + } else { + log(JSON.stringify(originalTurnsForLog), text); + } return text; } @@ -102,7 +116,7 @@ export class Gemini { const text = response.text(); console.log('Received.'); if (imageBuffer && text) { - logVision(turns, imageBuffer, text, prompt); + logVision([{role: 'system', content: systemMessage}, ...turns], imageBuffer, text, prompt); } if (!text.includes(stop_seq)) return text; const idx = text.indexOf(stop_seq); @@ -118,6 +132,7 @@ export class Gemini { if (typeof res === 'string') { res = res.replace(//g, '').replace(/<\/thinking>/g, ''); } + // For error cases in vision, still use regular log since there's no image to save log(JSON.stringify(loggedTurnsForError), res); } return res; From 69332f6a198bd0328d2950d4c402e8abd8e2f220 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:02:42 -0700 Subject: [PATCH 08/26] Update glhf.js Fixed some logging --- src/models/glhf.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models/glhf.js b/src/models/glhf.js index 17fbea1..3e29731 100644 --- a/src/models/glhf.js +++ b/src/models/glhf.js @@ -75,7 +75,7 @@ export class GLHF { if (typeof finalRes === 'string') { finalRes = finalRes.replace(//g, '').replace(/<\/thinking>/g, ''); } - log(JSON.stringify(messages), finalRes); + log(JSON.stringify([{ role: 'system', content: systemMessage }].concat(turns)), finalRes); return finalRes; } From 63ff3e4c1f469ef79e0b22fd5e196585e0b832c2 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:03:16 -0700 Subject: [PATCH 09/26] Update gpt.js Fixed some logging --- src/models/gpt.js | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/models/gpt.js b/src/models/gpt.js index 4fd72fa..3c9bfde 100644 --- a/src/models/gpt.js +++ b/src/models/gpt.js @@ -87,7 +87,25 @@ export class GPT { if (typeof res === 'string') { res = res.replace(//g, '').replace(/<\/thinking>/g, ''); } - log(JSON.stringify(messages), res); + + if (imageData) { + const conversationForLogVision = [{ role: "system", content: systemMessage }].concat(turns); + let visionPromptText = ""; + if (turns.length > 0) { + const lastTurn = turns[turns.length - 1]; + if (lastTurn.role === 'user') { + if (typeof lastTurn.content === 'string') { + visionPromptText = lastTurn.content; + } else if (Array.isArray(lastTurn.content)) { + const textPart = lastTurn.content.find(part => part.type === 'text'); + if (textPart) visionPromptText = textPart.text; + } + } + } + logVision(conversationForLogVision, imageData, res, visionPromptText); + } else { + log(JSON.stringify([{ role: "system", content: systemMessage }].concat(turns)), res); + } return res; } @@ -107,7 +125,8 @@ export class GPT { const res = await this.sendRequest(imageFormattedTurns, systemMessage); if (imageBuffer && res) { - logVision(original_turns, imageBuffer, res, systemMessage); + // The conversationHistory for logVision should be the state *before* this specific vision interaction's prompt was added. + logVision([{ role: "system", content: systemMessage }].concat(original_turns), imageBuffer, res, systemMessage); } return res; } From 8e558a10add7899ac75db584e98ed812f57c5339 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:14:34 -0700 Subject: [PATCH 10/26] Update grok.js Fixed some logging --- src/models/grok.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/models/grok.js b/src/models/grok.js index 79c956d..afd51d6 100644 --- a/src/models/grok.js +++ b/src/models/grok.js @@ -56,7 +56,7 @@ export class Grok { if (typeof finalResponseText === 'string') { finalResponseText = finalResponseText.replace(//g, '').replace(/<\/thinking>/g, ''); } - log(JSON.stringify(messages), finalResponseText); + log(JSON.stringify([{ role: "system", content: systemMessage }].concat(turns)), finalResponseText); return finalResponseText; } @@ -76,7 +76,7 @@ export class Grok { const res = await this.sendRequest(imageFormattedTurns, systemMessage); if (imageBuffer && res) { - logVision(original_turns, imageBuffer, res, systemMessage); + logVision([{ role: "system", content: systemMessage }].concat(original_turns), imageBuffer, res, systemMessage); } return res; } From bdb3b1788af6bc8dbe872252732aa2b6e19e7242 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:15:03 -0700 Subject: [PATCH 11/26] Update groq.js Fixed some logging --- src/models/groq.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/models/groq.js b/src/models/groq.js index de7ebbd..a836d2c 100644 --- a/src/models/groq.js +++ b/src/models/groq.js @@ -60,7 +60,7 @@ export class GroqCloudAPI { if (typeof responseText === 'string') { responseText = responseText.replace(//g, '').replace(/<\/thinking>/g, ''); } - log(JSON.stringify(messages), responseText); + log(JSON.stringify([{ role: "system", content: systemMessage }].concat(turns)), responseText); // Original cleaning of tags for the *returned* response (not affecting log) responseText = responseText.replace(/[\s\S]*?<\/think>/g, '').trim(); return responseText; @@ -75,7 +75,7 @@ export class GroqCloudAPI { if (typeof res === 'string') { res = res.replace(//g, '').replace(/<\/thinking>/g, ''); } - log(JSON.stringify(messages), res); + log(JSON.stringify([{ role: "system", content: systemMessage }].concat(turns)), res); return res; } } @@ -96,7 +96,7 @@ export class GroqCloudAPI { const res = await this.sendRequest(imageMessages, systemMessage); if (imageBuffer && res) { - logVision(original_turns, imageBuffer, res, systemMessage); + logVision([{ role: "system", content: systemMessage }].concat(original_turns), imageBuffer, res, systemMessage); } return res; } From ba1b0ea22f60b3c003620220483a557d7a51d693 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:15:37 -0700 Subject: [PATCH 12/26] Update hyperbolic.js Fixed some logging --- src/models/hyperbolic.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models/hyperbolic.js b/src/models/hyperbolic.js index 076c812..91989f3 100644 --- a/src/models/hyperbolic.js +++ b/src/models/hyperbolic.js @@ -116,7 +116,7 @@ export class Hyperbolic { if (typeof finalRes === 'string') { finalRes = finalRes.replace(//g, '').replace(/<\/thinking>/g, ''); } - log(JSON.stringify(messages), finalRes); + log(JSON.stringify([{ role: 'system', content: systemMessage }].concat(turns)), finalRes); return finalRes; } From 3ea4c2df5df69ee3a26aedf174799869dc968a21 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:15:57 -0700 Subject: [PATCH 13/26] Update local.js Fixed some logging --- src/models/local.js | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/models/local.js b/src/models/local.js index c199df8..555735e 100644 --- a/src/models/local.js +++ b/src/models/local.js @@ -93,7 +93,22 @@ export class Local { if (typeof finalRes === 'string') { finalRes = finalRes.replace(//g, '').replace(/<\/thinking>/g, ''); } - log(JSON.stringify(messages), finalRes); + + if (imageData) { // If imageData was part of this sendRequest call + // `messages` here already includes the system prompt and image data + let visionPromptText = ""; + if (messages.length > 0) { + const lastTurn = messages[messages.length -1]; + // For Ollama, content is a string, images is a separate array. + if (lastTurn.role === 'user' && typeof lastTurn.content === 'string') { + visionPromptText = lastTurn.content; + } + } + logVision(messages, imageData, finalRes, visionPromptText); + } else { + // messages already includes system prompt if no imageData + log(JSON.stringify(messages), finalRes); + } return finalRes; } From 989664d1befe93ffc7b28c3e8d488d9c589f2e2e Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:16:42 -0700 Subject: [PATCH 14/26] Update openrouter.js Fixed some logging --- src/models/openrouter.js | 42 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/models/openrouter.js b/src/models/openrouter.js index 838f4a3..3b4c3a9 100644 --- a/src/models/openrouter.js +++ b/src/models/openrouter.js @@ -48,7 +48,7 @@ export class OpenRouter { return 'No response received.'; } - const logMessages = [{ role: "system", content: processedSystemMessage }].concat(turns); + const logMessages = [{ role: "system", content: systemMessage }].concat(turns); if (completion.choices[0].finish_reason === 'length') { throw new Error('Context length exceeded'); @@ -58,23 +58,15 @@ export class OpenRouter { try{ const reasoning = '\n' + completion.choices[0].message.reasoning + '\n'; const content = completion.choices[0].message.content; - - // --- VISION LOGGING --- - if (visionImageBuffer) { - logVision(turns, visionImageBuffer, reasoning + "\n" + content, visionMessage); - } else { - log(JSON.stringify(logMessages), reasoning + "\n" + content); - } + // Standard logging for text-based responses + log(JSON.stringify(logMessages), reasoning + "\n" + content); res = content; } catch {} } else { try { res = completion.choices[0].message.content; - if (visionImageBuffer) { - logVision(turns, visionImageBuffer, res, visionMessage); - } else { - log(JSON.stringify(logMessages), res); - } + // Standard logging for text-based responses + log(JSON.stringify(logMessages), res); } catch { console.warn("Unable to log due to unknown error!"); } @@ -101,12 +93,13 @@ export class OpenRouter { return finalRes; } - async sendVisionRequest(messages, systemMessage, imageBuffer) { - const imageMessages = [...messages]; - imageMessages.push({ + async sendVisionRequest(original_turns, systemMessage, imageBuffer) { // Renamed messages to original_turns + const imageFormattedTurns = [...original_turns]; + imageFormattedTurns.push({ role: "user", content: [ - { type: "text", text: systemMessage }, + // The systemMessage is used as the text prompt accompanying the image here + { type: "text", text: systemMessage }, { type: "image_url", image_url: { @@ -116,10 +109,17 @@ export class OpenRouter { ] }); - // sendVisionRequest formats its own message array; sendRequest here should not process new imageData. - // Pass systemMessage and stop_seq as originally intended by sendRequest. - return this.sendRequest(imageMessages, systemMessage, null, stop_seq); - + // Pass the main systemMessage to sendRequest, as it expects a system prompt. + // The image-specific prompt is part of imageFormattedTurns. + const res = await this.sendRequest(imageFormattedTurns, systemMessage, null, stop_seq); + + if (imageBuffer && res) { + // For logVision, conversationHistory should be the original turns + system prompt. + // The visionMessage (text prompt for the image) is systemMessage in this context. + logVision([{ role: "system", content: systemMessage }].concat(original_turns), imageBuffer, res, systemMessage); + } + + return res; } async embed(text) { From d116e9012695dfadf41855bb55f1ac8bfac1aed8 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:17:51 -0700 Subject: [PATCH 15/26] Update prompter.js Fixed spacing and logging --- src/models/prompter.js | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/models/prompter.js b/src/models/prompter.js index 9b4b70f..d7ad05c 100644 --- a/src/models/prompter.js +++ b/src/models/prompter.js @@ -364,6 +364,9 @@ export class Prompter { console.log("Generated response:", generation); await this._saveLog(prompt, messages, generation, 'conversation'); + // Remove the incorrect logVision call here since sendRequest should handle it + // The model's sendRequest method will call logVision if imageData was provided + } catch (error) { console.error('Error during message generation or file writing:', error); continue; @@ -465,26 +468,15 @@ export class Prompter { } async _saveLog(prompt, messages, generation, tag) { - // NEW LOGIC STARTS switch (tag) { case 'conversation': case 'coding': // Assuming coding logs fall under normal data case 'memSaving': if (!settings.log_normal_data) return; break; - // Add case for 'vision' if prompter.js starts logging vision prompts/responses via _saveLog - // case 'vision': - // if (!settings.log_vision_data) return; - // break; default: - // If it's an unknown tag, perhaps log it if general logging is on, or ignore. - // For safety, let's assume if it's not specified, it doesn't get logged unless a general flag is on. - // However, the goal is to use specific flags. So, if a new tag appears, this logic should be updated. - // For now, if it doesn't match known tags that map to a setting, it won't log. return; } - // NEW LOGIC ENDS - const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); let logEntry; let task_id = this.agent.task.task_id; @@ -511,6 +503,4 @@ export class Prompter { logFile = path.join(logDir, logFile); await fs.appendFile(logFile, String(logEntry), 'utf-8'); } - - } From 21ad69693f03304961d3b5aa22764f55c87bbcbf Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:18:09 -0700 Subject: [PATCH 16/26] Update qwen.js --- src/models/qwen.js | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/models/qwen.js b/src/models/qwen.js index f37a4ef..506d715 100644 --- a/src/models/qwen.js +++ b/src/models/qwen.js @@ -85,7 +85,24 @@ export class Qwen { if (typeof res === 'string') { res = res.replace(//g, '').replace(/<\/thinking>/g, ''); } - log(JSON.stringify(messages), res); + + if (imageData) { // If imageData was part of this sendRequest call + // `messages` here includes system prompt and image data + let visionPromptText = ""; + if (messages.length > 0) { + const lastTurn = messages[messages.length - 1]; + if (lastTurn.role === 'user' && Array.isArray(lastTurn.content)) { + const textPart = lastTurn.content.find(part => part.text); + if (textPart) visionPromptText = textPart.text; + } else if (lastTurn.role === 'user' && typeof lastTurn.content === 'string'){ + visionPromptText = lastTurn.content; + } + } + logVision(messages, imageData, res, visionPromptText); + } else { + // messages already includes system prompt if no imageData + log(JSON.stringify(messages), res); + } return res; } @@ -117,4 +134,4 @@ export class Qwen { throw new Error('Max retries reached, request failed.'); } -} \ No newline at end of file +} From 87e2e708fdb64c88881072215b37eb987c709619 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:19:23 -0700 Subject: [PATCH 17/26] Update settings.js Updated settings with the new features --- settings.js | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/settings.js b/settings.js index 215ee77..b260f69 100644 --- a/settings.js +++ b/settings.js @@ -7,7 +7,7 @@ const settings = { // the mindserver manages all agents and hosts the UI "host_mindserver": true, // if true, the mindserver will be hosted on this machine. otherwise, specify a public IP address "mindserver_host": "localhost", - "mindserver_port": 8080, + "mindserver_port": 8081, // the base profile is shared by all bots for default prompts/examples/modes "base_profile": "./profiles/defaults/survival.json", // also see creative.json, god_mode.json @@ -26,15 +26,15 @@ const settings = { // using more than 1 profile requires you to /msg each bot indivually // individual profiles override values from the base profile ], - "load_memory": false, // load memory from previous session + "load_memory": true, // load memory from previous session "init_message": "Respond with hello world and your name", // sends to all on spawn "only_chat_with": [], // users that the bots listen to and send general messages to. if empty it will chat publicly "language": "en", // translate to/from this language. Supports these language names: https://cloud.google.com/translate/docs/languages "show_bot_views": false, // show bot's view in browser at localhost:3000, 3001... - "allow_insecure_coding": false, // allows newAction command and model can write/run code on your computer. enable at own risk - "allow_vision": false, // allows vision model to interpret screenshots as inputs - "vision_mode": "prompted", // "off", "prompted", or "always" + "allow_insecure_coding": true, // allows newAction command and model can write/run code on your computer. enable at own risk + "allow_vision": true, // allows vision model to interpret screenshots as inputs + "vision_mode": "always", // "off", "prompted", or "always" "blocked_actions" : ["!checkBlueprint", "!checkBlueprintLevel", "!getBlueprint", "!getBlueprintLevel"] , // commands to disable and remove from docs. Ex: ["!setMode"] "code_timeout_mins": -1, // minutes code is allowed to run. -1 for no timeout "relevant_docs_count": 5, // number of relevant code function docs to select for prompting. -1 for all @@ -46,15 +46,25 @@ const settings = { "narrate_behavior": true, // chat simple automatic actions ('Picking up item!') "chat_bot_messages": true, // publicly chat messages to other bots - "stt_transcription": false, // change this to "true" or "false" depending on if you want STT in Mindcraft, STT needs a GroqCloud API key, can be found here: https://console.groq.com/keys - "stt_username": "SYSTEM", // Change this to the username the model will respond to. - "stt_agent_name": "", // Change the name here to whatever your agent is named, if left empty, will send message to all agents. - "speak": false, // allows all bots to speak through system text-to-speech. works on windows, mac, on linux you need to `apt install espeak` - - "log_normal_data": false, // Logs all inputs / outputs without reasoning or vision data - "log_reasoning_data": false, // Logs only reasoning inputs / outputs - "log_vision_data": false, // Logs only vision inputs / outputs + "speak": true, // enable text-to-speech + "stt_transcription": true, // enable speech-to-text transcription + "stt_username": "SERVER", // username for STT messages + "stt_agent_name": "", // agent name for STT messages, if empty it will send the STT to all bots + // STT Audio Detection Settings + "stt_rms_threshold": 8000, // Higher = less sensitive to background noise + "stt_silence_duration": 2000, // 2 seconds of silence before stopping + "stt_min_audio_duration": 0.5, // Minimum audio duration in seconds + "stt_max_audio_duration": 15, // Maximum audio duration in seconds + "stt_debug_audio": false, // Enable to see audio levels and tune threshold + "stt_cooldown_ms": 2000, // Minimum time between recordings (increased) + "stt_speech_threshold_ratio": 0.15, // Percentage of samples that must be above threshold to consider it speech + "stt_consecutive_speech_samples": 5, // Consecutive samples above threshold before considering it speech + + "log_normal_data": true, // Logs all inputs / outputs without reasoning or vision data + "log_reasoning_data": true, // Logs only reasoning inputs / outputs + "log_vision_data": true, // Logs only vision inputs / outputs + } // these environment variables override certain settings From f22b4957e0cb53b0c31c0fcf6e2eedccea1ccd62 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:25:33 -0700 Subject: [PATCH 18/26] Update settings.js Changed some of the values for a better STT experience --- settings.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/settings.js b/settings.js index b260f69..38304d8 100644 --- a/settings.js +++ b/settings.js @@ -52,10 +52,10 @@ const settings = { "stt_agent_name": "", // agent name for STT messages, if empty it will send the STT to all bots // STT Audio Detection Settings - "stt_rms_threshold": 8000, // Higher = less sensitive to background noise + "stt_rms_threshold": 1000, // Higher = less sensitive to background noise "stt_silence_duration": 2000, // 2 seconds of silence before stopping "stt_min_audio_duration": 0.5, // Minimum audio duration in seconds - "stt_max_audio_duration": 15, // Maximum audio duration in seconds + "stt_max_audio_duration": 45, // Maximum audio duration in seconds "stt_debug_audio": false, // Enable to see audio levels and tune threshold "stt_cooldown_ms": 2000, // Minimum time between recordings (increased) "stt_speech_threshold_ratio": 0.15, // Percentage of samples that must be above threshold to consider it speech From 4d6765cacfc70f63beb28863dddbdd93b3194deb Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:29:53 -0700 Subject: [PATCH 19/26] Update settings.js --- settings.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/settings.js b/settings.js index 38304d8..e1f0daa 100644 --- a/settings.js +++ b/settings.js @@ -52,14 +52,14 @@ const settings = { "stt_agent_name": "", // agent name for STT messages, if empty it will send the STT to all bots // STT Audio Detection Settings - "stt_rms_threshold": 1000, // Higher = less sensitive to background noise + "stt_rms_threshold": 3000, // Raised from 1000 to reduce false triggers "stt_silence_duration": 2000, // 2 seconds of silence before stopping "stt_min_audio_duration": 0.5, // Minimum audio duration in seconds "stt_max_audio_duration": 45, // Maximum audio duration in seconds - "stt_debug_audio": false, // Enable to see audio levels and tune threshold - "stt_cooldown_ms": 2000, // Minimum time between recordings (increased) - "stt_speech_threshold_ratio": 0.15, // Percentage of samples that must be above threshold to consider it speech - "stt_consecutive_speech_samples": 5, // Consecutive samples above threshold before considering it speech + "stt_debug_audio": true, // Enable to see what's happening + "stt_cooldown_ms": 2000, // Minimum time between recordings + "stt_speech_threshold_ratio": 0.05, // Much lower - 5% instead of 15% + "stt_consecutive_speech_samples": 3, // Reduced from 5 to 3 "log_normal_data": true, // Logs all inputs / outputs without reasoning or vision data "log_reasoning_data": true, // Logs only reasoning inputs / outputs From d79b3f3534962d84685062ebb0c9d11f9ed8b52b Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:33:21 -0700 Subject: [PATCH 20/26] Update settings.js Restored the settings back to its true form --- settings.js | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/settings.js b/settings.js index e1f0daa..9f33825 100644 --- a/settings.js +++ b/settings.js @@ -7,7 +7,7 @@ const settings = { // the mindserver manages all agents and hosts the UI "host_mindserver": true, // if true, the mindserver will be hosted on this machine. otherwise, specify a public IP address "mindserver_host": "localhost", - "mindserver_port": 8081, + "mindserver_port": 8080, // the base profile is shared by all bots for default prompts/examples/modes "base_profile": "./profiles/defaults/survival.json", // also see creative.json, god_mode.json @@ -26,15 +26,15 @@ const settings = { // using more than 1 profile requires you to /msg each bot indivually // individual profiles override values from the base profile ], - "load_memory": true, // load memory from previous session + "load_memory": false, // load memory from previous session "init_message": "Respond with hello world and your name", // sends to all on spawn "only_chat_with": [], // users that the bots listen to and send general messages to. if empty it will chat publicly "language": "en", // translate to/from this language. Supports these language names: https://cloud.google.com/translate/docs/languages "show_bot_views": false, // show bot's view in browser at localhost:3000, 3001... - "allow_insecure_coding": true, // allows newAction command and model can write/run code on your computer. enable at own risk - "allow_vision": true, // allows vision model to interpret screenshots as inputs - "vision_mode": "always", // "off", "prompted", or "always" + "allow_insecure_coding": false, // allows newAction command and model can write/run code on your computer. enable at own risk + "allow_vision": false, // allows vision model to interpret screenshots as inputs + "vision_mode": "off", // "off", "prompted", or "always" "blocked_actions" : ["!checkBlueprint", "!checkBlueprintLevel", "!getBlueprint", "!getBlueprintLevel"] , // commands to disable and remove from docs. Ex: ["!setMode"] "code_timeout_mins": -1, // minutes code is allowed to run. -1 for no timeout "relevant_docs_count": 5, // number of relevant code function docs to select for prompting. -1 for all @@ -46,8 +46,8 @@ const settings = { "narrate_behavior": true, // chat simple automatic actions ('Picking up item!') "chat_bot_messages": true, // publicly chat messages to other bots - "speak": true, // enable text-to-speech - "stt_transcription": true, // enable speech-to-text transcription + "speak": false, // enable text-to-speech + "stt_transcription": false, // enable speech-to-text transcription "stt_username": "SERVER", // username for STT messages "stt_agent_name": "", // agent name for STT messages, if empty it will send the STT to all bots @@ -61,9 +61,9 @@ const settings = { "stt_speech_threshold_ratio": 0.05, // Much lower - 5% instead of 15% "stt_consecutive_speech_samples": 3, // Reduced from 5 to 3 - "log_normal_data": true, // Logs all inputs / outputs without reasoning or vision data - "log_reasoning_data": true, // Logs only reasoning inputs / outputs - "log_vision_data": true, // Logs only vision inputs / outputs + "log_normal_data": false, // Logs all inputs / outputs without reasoning or vision data + "log_reasoning_data": false, // Logs only reasoning inputs / outputs + "log_vision_data": false, // Logs only vision inputs / outputs } From 9d768515b20f231b62e0ca8009ac47dc76b0b021 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:34:03 -0700 Subject: [PATCH 21/26] Update tts_process.js Fixed the STT detection, now much smarter than before --- src/process/tts_process.js | 380 +++++++++++++++++++------------------ 1 file changed, 200 insertions(+), 180 deletions(-) diff --git a/src/process/tts_process.js b/src/process/tts_process.js index 2ce3dd5..4515df0 100644 --- a/src/process/tts_process.js +++ b/src/process/tts_process.js @@ -1,7 +1,5 @@ import settings from '../../settings.js'; import { GroqCloudTTS } from '../models/groq.js'; -// import portAudio from 'naudiodon'; // Original static import -// const { AudioIO, SampleFormat16Bit } = portAudio; // Original destructuring import wav from 'wav'; import fs from 'fs'; import path from 'path'; @@ -12,8 +10,7 @@ import { getIO, getAllInGameAgentNames } from '../server/mind_server.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); - -// --- Conditional Naudiodon Import --- +// Import the audio libraries conditionally let portAudio; let AudioIO; let SampleFormat16Bit; @@ -82,25 +79,35 @@ for (const file of leftover) { } } -// Configuration -const RMS_THRESHOLD = 500; // Lower threshold for faint audio -const SILENCE_DURATION = 2000; // 2 seconds of silence after speech => stop +// Configuration from settings +const RMS_THRESHOLD = settings.stt_rms_threshold || 8000; +const SILENCE_DURATION = settings.stt_silence_duration || 2000; +const MIN_AUDIO_DURATION = settings.stt_min_audio_duration || 0.5; +const MAX_AUDIO_DURATION = settings.stt_max_audio_duration || 15; +const DEBUG_AUDIO = settings.stt_debug_audio || false; +const COOLDOWN_MS = settings.stt_cooldown_ms || 2000; +const SPEECH_THRESHOLD_RATIO = settings.stt_speech_threshold_ratio || 0.15; +const CONSECUTIVE_SPEECH_SAMPLES = settings.stt_consecutive_speech_samples || 5; const SAMPLE_RATE = 16000; const BIT_DEPTH = 16; -const STT_USERNAME = settings.stt_username || "SERVER"; // Name that appears as sender -const STT_AGENT_NAME = settings.stt_agent_name || ""; // If blank, broadcast to all +const STT_USERNAME = settings.stt_username || "SERVER"; +const STT_AGENT_NAME = settings.stt_agent_name || ""; // Guards to prevent multiple overlapping recordings -let isRecording = false; // Ensures only one recordAndTranscribeOnce at a time -let sttRunning = false; // Ensures continuousLoop is started only once +let isRecording = false; +let sttRunning = false; +let sttInitialized = false; +let lastRecordingEndTime = 0; -/** - * Records one session, transcribes, and sends to MindServer as a chat message - */ async function recordAndTranscribeOnce() { + // Check cooldown period + const timeSinceLastRecording = Date.now() - lastRecordingEndTime; + if (timeSinceLastRecording < COOLDOWN_MS) { + return null; + } + // If another recording is in progress, just skip if (isRecording) { - console.log("[STT] Another recording is still in progress; skipping new record attempt."); return null; } isRecording = true; @@ -113,18 +120,26 @@ async function recordAndTranscribeOnce() { }); if (!activeAudioLibrary) { - console.warn("[STT] No audio recording library available (naudiodon or mic). Cannot record audio."); + console.warn("[STT] No audio recording library available."); isRecording = false; return null; } - let audioInterface; // Will hold either naudiodon's 'ai' or mic's 'micInstance' - let audioStream; // Will hold either naudiodon's 'ai' or mic's 'micInputStream' - + let audioInterface; + let audioStream; let recording = true; let hasHeardSpeech = false; let silenceTimer = null; - let finished = false; // Guard to ensure final processing is done only once + let maxDurationTimer = null; + let finished = false; + + // Smart speech detection variables + let speechSampleCount = 0; + let totalSampleCount = 0; + let consecutiveSpeechSamples = 0; + let speechLevels = []; + let averageSpeechLevel = 0; + let adaptiveThreshold = RMS_THRESHOLD; // Helper to reset silence timer function resetSilenceTimer() { @@ -132,7 +147,7 @@ async function recordAndTranscribeOnce() { // Only start silence timer if actual speech has been detected if (hasHeardSpeech && recording) { // also check `recording` to prevent timer after explicit stop silenceTimer = setTimeout(() => { - console.log('[STT] Silence detected, stopping recording.'); + if (DEBUG_AUDIO) console.log('[STT] Silence timeout reached, stopping recording.'); stopRecording(); }, SILENCE_DURATION); } @@ -141,114 +156,85 @@ async function recordAndTranscribeOnce() { // Stop recording function stopRecording() { if (!recording) return; - console.log('[STT] stopRecording called.'); - recording = false; // Set recording to false immediately + recording = false; + + if (silenceTimer) clearTimeout(silenceTimer); + if (maxDurationTimer) clearTimeout(maxDurationTimer); if (activeAudioLibrary === 'naudiodon' && audioInterface) { - audioInterface.quit(); + try { + audioInterface.quit(); + } catch (err) { + // Silent error handling + } } else if (activeAudioLibrary === 'mic' && audioInterface) { - audioInterface.stop(); // micInstance.stop() + try { + audioInterface.stop(); + } catch (err) { + // Silent error handling + } } - // fileWriter.end() will be called by the 'finish' or 'silence' event handlers - // to ensure all data is written before closing the file. - // However, if stopRecording is called externally (e.g. by SILENCE_DURATION timer) - // and not by an event that naturally ends the stream, we might need to end it here. - // Let's defer fileWriter.end() to specific event handlers for now, - // but if issues arise, this is a place to check. - // For now, we rely on 'silence' (mic) or 'quit' sequence (naudiodon) to close writer. - } + if (fileWriter && !fileWriter.closed) { + fileWriter.end(); + } + } // We wrap everything in a promise so we can await the transcription return new Promise((resolve, reject) => { + // Set maximum recording duration timer + maxDurationTimer = setTimeout(() => { + stopRecording(); + }, MAX_AUDIO_DURATION * 1000); + if (activeAudioLibrary === 'naudiodon') { - if (!AudioIO || !SampleFormat16Bit) { // Should have been caught by activeAudioLibrary check, but for safety - console.warn("[STT] Naudiodon not available for recording."); + if (!AudioIO || !SampleFormat16Bit) { isRecording = false; return reject(new Error("Naudiodon not available")); } - audioInterface = new AudioIO({ // Naudiodon's ai + audioInterface = new AudioIO({ inOptions: { channelCount: 1, sampleFormat: SampleFormat16Bit, sampleRate: SAMPLE_RATE, - deviceId: -1, // Default device + deviceId: -1, closeOnError: true } }); - audioStream = audioInterface; // For naudiodon, the interface itself is the stream emitter + audioStream = audioInterface; audioStream.on('error', (err) => { - console.error("[STT] Naudiodon AudioIO error:", err); - stopRecording(); // Try to stop everything - fileWriter.end(() => fs.unlink(outFile, () => {})); // End writer and delete file - cleanupListeners(); - resolve(null); // Resolve with null as per existing logic for continuousLoop + cleanupAndResolve(null); }); } else if (activeAudioLibrary === 'mic') { - // Calculate exitOnSilence for mic. It's in number of 512-byte chunks. - // Each chunk is 256 samples (16-bit, so 2 bytes per sample). - // Duration of one chunk = 256 samples / SAMPLE_RATE seconds. - // Number of chunks for SILENCE_DURATION: - // (SILENCE_DURATION / 1000) / (256 / SAMPLE_RATE) - const micExitOnSilence = Math.ceil((SILENCE_DURATION / 1000) * (SAMPLE_RATE / 256)); - console.log(`[STT] Mic exitOnSilence calculated to: ${micExitOnSilence} frames (for ${SILENCE_DURATION}ms)`); - - audioInterface = new mic({ // micInstance + audioInterface = new mic({ rate: String(SAMPLE_RATE), channels: '1', bitwidth: String(BIT_DEPTH), endian: 'little', encoding: 'signed-integer', - device: 'default', // Or settings.audio_input_device - exitOnSilence: micExitOnSilence, // This will trigger 'silence' event - debug: false // settings.debug_audio || false + device: 'default', + debug: false // Don't use mic's debug, we have our own }); audioStream = audioInterface.getAudioStream(); audioStream.on('error', (err) => { - console.error('[STT] Mic error:', err); - stopRecording(); - fileWriter.end(() => fs.unlink(outFile, () => {})); - cleanupListeners(); - resolve(null); - }); - - audioStream.on('silence', () => { - console.log('[STT] Mic detected silence.'); - // stopRecording(); // This will call micInstance.stop() - // which then triggers processExitComplete. - // Redundant if exitOnSilence is working as expected. - // Let's ensure stopRecording is called to clear timers etc. - if (recording) { // Only call stop if we haven't already stopped for other reasons - stopRecording(); - } - // Important: mic automatically stops on silence. We need to ensure fileWriter is closed. - if (fileWriter && !fileWriter.closed) { - fileWriter.end(); // This will trigger 'finish' on fileWriter - } + cleanupAndResolve(null); }); audioStream.on('processExitComplete', () => { - console.log('[STT] Mic processExitComplete.'); - // This indicates mic has fully stopped. - // Ensure fileWriter is ended if not already. - if (fileWriter && !fileWriter.closed) { - console.log('[STT] Mic processExitComplete: Ending fileWriter.'); - fileWriter.end(); - } - // isRecording should be set to false by stopRecording() + // Silent }); } // Common event handling for data (applies to both naudiodon ai and micStream) audioStream.on('data', (chunk) => { - if (!recording) return; // Don't process data if no longer recording + if (!recording) return; fileWriter.write(chunk); - // Calculate RMS for threshold detection (same logic for both libraries) + // Calculate RMS for threshold detection let sumSquares = 0; const sampleCount = chunk.length / 2; for (let i = 0; i < chunk.length; i += 2) { @@ -256,44 +242,65 @@ async function recordAndTranscribeOnce() { sumSquares += sample * sample; } const rms = Math.sqrt(sumSquares / sampleCount); + totalSampleCount++; - // If RMS passes threshold, we've heard speech - if (rms > RMS_THRESHOLD) { - if (!hasHeardSpeech) { - hasHeardSpeech = true; + // Simplified speech detection logic + if (rms > adaptiveThreshold) { + speechSampleCount++; + consecutiveSpeechSamples++; + speechLevels.push(rms); + + // Update adaptive threshold based on actual speech levels + if (speechLevels.length > 10) { + averageSpeechLevel = speechLevels.reduce((a, b) => a + b, 0) / speechLevels.length; + adaptiveThreshold = Math.max(RMS_THRESHOLD, averageSpeechLevel * 0.4); // 40% of average speech level } - resetSilenceTimer(); + + // Trigger speech detection much more easily + if (!hasHeardSpeech) { + // Either consecutive samples OR sufficient ratio + const speechRatio = speechSampleCount / totalSampleCount; + if (consecutiveSpeechSamples >= 3 || speechRatio >= 0.05) { // Much lower thresholds + hasHeardSpeech = true; + console.log(`[STT] Speech detected! (consecutive: ${consecutiveSpeechSamples}, ratio: ${(speechRatio * 100).toFixed(1)}%)`); + } + } + + if (hasHeardSpeech) { + resetSilenceTimer(); + } + } else { + consecutiveSpeechSamples = 0; // Reset consecutive counter } }); - // fileWriter.on('finish', ...) remains largely the same but moved outside library-specific setup - // }); // This was part of ai.on('data', ...) which is now common code block. - - // This was ai.on('error',...) specific to naudiodon, now handled above. - // }); - fileWriter.on('finish', async () => { - console.log('[STT] FileWriter finished.'); if (finished) return; finished = true; - - // Ensure recording is marked as stopped and lock released - isRecording = false; - if (silenceTimer) clearTimeout(silenceTimer); + lastRecordingEndTime = Date.now(); + try { - // Check audio duration const stats = fs.statSync(outFile); - const headerSize = 44; // standard WAV header size + const headerSize = 44; const dataSize = stats.size - headerSize; const duration = dataSize / (SAMPLE_RATE * (BIT_DEPTH / 8)); - if (duration < 2.75) { - console.log("[STT] Audio too short (<2.75s); discarding."); - fs.unlink(outFile, () => {}); - cleanupListeners(); - return resolve(null); + + const speechPercentage = totalSampleCount > 0 ? (speechSampleCount / totalSampleCount) * 100 : 0; + + if (DEBUG_AUDIO) { + console.log(`[STT] Audio processed: ${duration.toFixed(2)}s, speech detected: ${hasHeardSpeech}, speech %: ${speechPercentage.toFixed(1)}%`); + } + + if (duration < MIN_AUDIO_DURATION) { + cleanupAndResolve(null); + return; + } + + if (!hasHeardSpeech || speechPercentage < 3) { // Lowered from 15% to 3% + cleanupAndResolve(null); + return; } - // Transcribe const groqTTS = new GroqCloudTTS(); const text = await groqTTS.transcribe(outFile, { model: "distil-whisper-large-v3-en", @@ -303,92 +310,90 @@ async function recordAndTranscribeOnce() { temperature: 0.0 }); - fs.unlink(outFile, () => {}); // cleanup WAV file - - // Basic check for empty or whitespace if (!text || !text.trim()) { - console.log("[STT] Transcription empty; discarding."); - cleanupListeners(); - return resolve(null); + cleanupAndResolve(null); + return; } - // Heuristic checks to determine if the transcription is genuine - - // 1. Ensure at least one alphabetical character + // Enhanced validation if (!/[A-Za-z]/.test(text)) { - console.log("[STT] Transcription has no letters; discarding."); - cleanupListeners(); - return resolve(null); + cleanupAndResolve(null); + return; } - // 2. Check for gibberish repeated sequences if (/([A-Za-z])\1{3,}/.test(text)) { - console.log("[STT] Transcription looks like gibberish; discarding."); - cleanupListeners(); - return resolve(null); + cleanupAndResolve(null); + return; + } + + // Filter out common false positives + const falsePositives = ["thank you", "thanks", "bye", ".", ",", "?", "!", "um", "uh", "hmm"]; + if (falsePositives.includes(text.trim().toLowerCase())) { + cleanupAndResolve(null); + return; } - // 3. Check transcription length, with allowed greetings const letterCount = text.replace(/[^A-Za-z]/g, "").length; const normalizedText = text.trim().toLowerCase(); - const allowedGreetings = new Set(["hi", "hello", "greetings", "hey"]); + const allowedGreetings = new Set(["hi", "hello", "hey", "yes", "no", "okay"]); - if (letterCount < 8 && !allowedGreetings.has(normalizedText)) { - console.log("[STT] Transcription too short and not an allowed greeting; discarding."); - cleanupListeners(); - return resolve(null); + if (letterCount < 2 && !allowedGreetings.has(normalizedText)) { + cleanupAndResolve(null); + return; } - console.log("[STT] Transcription:", text); + // Only log successful transcriptions + console.log("[STT] Transcribed:", text); - // Format message so it looks like: "[SERVER] message" const finalMessage = `[${STT_USERNAME}] ${text}`; - // If STT_AGENT_NAME is empty, broadcast to all agents if (!STT_AGENT_NAME.trim()) { - const agentNames = getAllInGameAgentNames(); // from mind_server + const agentNames = getAllInGameAgentNames(); for (const agentName of agentNames) { getIO().emit('send-message', agentName, finalMessage); } } else { - // Otherwise, send only to the specified agent getIO().emit('send-message', STT_AGENT_NAME, finalMessage); } - cleanupListeners(); - resolve(text); + cleanupAndResolve(text); } catch (err) { - console.error("[STT] Error during transcription or sending message:", err); - fs.unlink(outFile, () => {}); // Attempt cleanup even on error - cleanupListeners(); - reject(err); // Propagate error for continuousLoop to catch + cleanupAndResolve(null); } }); - // Start the appropriate audio input - if (activeAudioLibrary === 'naudiodon') { - audioInterface.start(); - } else if (activeAudioLibrary === 'mic') { - audioInterface.start(); - } - - function cleanupListeners() { - if (audioStream && typeof audioStream.removeAllListeners === 'function') { - audioStream.removeAllListeners('data'); - audioStream.removeAllListeners('error'); - if (activeAudioLibrary === 'mic') { - audioStream.removeAllListeners('silence'); - audioStream.removeAllListeners('processExitComplete'); + function cleanupAndResolve(result) { + if (silenceTimer) clearTimeout(silenceTimer); + if (maxDurationTimer) clearTimeout(maxDurationTimer); + + try { + if (fs.existsSync(outFile)) { + fs.unlinkSync(outFile); } + } catch (err) { + // Silent cleanup + } + + if (audioStream && typeof audioStream.removeAllListeners === 'function') { + audioStream.removeAllListeners(); } if (fileWriter && typeof fileWriter.removeAllListeners === 'function') { - fileWriter.removeAllListeners('finish'); + fileWriter.removeAllListeners(); } - if (silenceTimer) clearTimeout(silenceTimer); - // release lock if it hasn't been released by fileWriter.on('finish') - // This is a safeguard. isRecording = false; + resolve(result); + } + + // Start recording + try { + if (activeAudioLibrary === 'naudiodon') { + audioInterface.start(); + } else if (activeAudioLibrary === 'mic') { + audioInterface.start(); + } + } catch (err) { + cleanupAndResolve(null); } }); } @@ -398,25 +403,39 @@ async function recordAndTranscribeOnce() { */ async function continuousLoop() { if (!activeAudioLibrary) { - console.warn("[STT] No audio recording library available. STT continuous loop cannot start."); + console.warn("[STT] No audio recording library available. STT disabled."); sttRunning = false; return; } + console.log("[STT] Speech-to-text active (Groq Whisper)"); + let consecutiveErrors = 0; + const maxConsecutiveErrors = 3; + while (sttRunning) { try { - await recordAndTranscribeOnce(); + const result = await recordAndTranscribeOnce(); + consecutiveErrors = 0; + + // Longer delay between recordings + if (sttRunning) { + await new Promise(res => setTimeout(res, 1000)); + } } catch (err) { - // Errors from recordAndTranscribeOnce (like transcription errors) are caught here - console.error("[STT Error in continuousLoop]", err); - // Potentially add a longer delay or a backoff mechanism if errors are persistent - } - // short gap, but only if stt is still supposed to be running - if (sttRunning) { - await new Promise(res => setTimeout(res, 1000)); + consecutiveErrors++; + + if (consecutiveErrors >= maxConsecutiveErrors) { + console.error("[STT] Too many errors, stopping STT."); + sttRunning = false; + break; + } + + if (sttRunning) { + const delay = 3000 * consecutiveErrors; + await new Promise(res => setTimeout(res, delay)); + } } } - console.log("[STT] Continuous loop ended."); } export function initTTS() { @@ -432,19 +451,20 @@ export function initTTS() { return; } - if (sttRunning) { - console.log("[STT] STT loop already running; skipping re-init."); + if (sttRunning || sttInitialized) { + console.log("[STT] STT already initialized; skipping re-init."); return; } console.log("[STT] Initializing STT..."); - sttRunning = true; // Set before starting the loop + sttRunning = true; + sttInitialized = true; - continuousLoop().catch((err) => { - console.error("[STT] continuousLoop crashed unexpectedly:", err); - sttRunning = false; // Mark as not running if it crashes - }); + setTimeout(() => { + continuousLoop().catch((err) => { + console.error("[STT] continuousLoop crashed unexpectedly:", err); + sttRunning = false; + sttInitialized = false; + }); + }, 2000); } - -// Moved initTTS() call into the async IIFE after naudiodon import attempt. -// initTTS(); From a6d69aecf1d8b1a949bad17018acf97e62ec0e0d Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 17:34:48 -0700 Subject: [PATCH 22/26] Update README.md Removed all of the things that Jules wanted to add. --- README.md | 65 ------------------------------------------------------- 1 file changed, 65 deletions(-) diff --git a/README.md b/README.md index 2990665..b468ada 100644 --- a/README.md +++ b/README.md @@ -14,71 +14,6 @@ Do not connect this bot to public servers with coding enabled. This project allo - [Node.js Installed](https://nodejs.org/) (at least v14) - One of these: [OpenAI API Key](https://openai.com/blog/openai-api) | [Gemini API Key](https://aistudio.google.com/app/apikey) | [Anthropic API Key](https://docs.anthropic.com/claude/docs/getting-access-to-claude) | [Replicate API Key](https://replicate.com/) | [Hugging Face API Key](https://huggingface.co/) | [Groq API Key](https://console.groq.com/keys) | [Ollama Installed](https://ollama.com/download). | [Mistral API Key](https://docs.mistral.ai/getting-started/models/models_overview/) | [Qwen API Key [Intl.]](https://www.alibabacloud.com/help/en/model-studio/developer-reference/get-api-key)/[[cn]](https://help.aliyun.com/zh/model-studio/getting-started/first-api-call-to-qwen?) | [Novita AI API Key](https://novita.ai/settings?utm_source=github_mindcraft&utm_medium=github_readme&utm_campaign=link#key-management) | -## Installation Prerequisites - -### `naudiodon` for Speech-to-Text (STT) - -The STT (Speech-to-Text) functionality in Mindcraft uses the `naudiodon` package for audio input. `naudiodon` is a native Node.js addon and might require additional steps to compile correctly during `npm install`. - -**`naudiodon` is an optional dependency.** This means: -* If `naudiodon` fails to install or build, the core Mindcraft application will still run. -* However, the Speech-to-Text (STT) feature will be automatically disabled if `naudiodon` is not available. You will see warnings in the console if it fails to load. -* If you wish to use STT and encounter build issues with `naudiodon`, please ensure you have the necessary build tools and libraries listed below for your operating system. - -**General Requirements for Building `naudiodon`:** -* **Node.js:** Ensure Node.js (v14+) is properly installed and added to your system's PATH. -* **Python:** `node-gyp` (the tool used to build native addons like `naudiodon`) requires Python. Recent versions of `node-gyp` are compatible with Python 3.x. Make sure Python is installed and accessible. -* **C++ Compiler Toolchain:** A C++ compiler (like g++ or MSVC) and related build tools (like `make` or MSBuild) are necessary. -* **PortAudio Library:** `naudiodon` specifically requires the PortAudio library. - -**Operating System Specifics for `PortAudio` (and `naudiodon` build):** - -### Linux -* **Debian/Ubuntu:** - ```bash - sudo apt-get update - sudo apt-get install build-essential libasound2-dev libportaudio-dev - ``` - (`build-essential` provides g++, make, etc. `libasound2-dev` is for ALSA, and `libportaudio-dev` is crucial for `naudiodon`.) - -* **Fedora/RHEL/CentOS:** - ```bash - # For newer Fedora (using dnf) - sudo dnf groupinstall "Development Tools" - sudo dnf install alsa-lib-devel portaudio-devel - - # For older RHEL/CentOS (using yum) - sudo yum groupinstall "Development Tools" - sudo yum install alsa-lib-devel portaudio-devel - ``` - (`portaudio-devel` is the equivalent of `libportaudio-dev`.) - -### Windows -* **Visual Studio C++ Build Tools:** This is the recommended way. - 1. Download the [Visual Studio Installer](https://visualstudio.microsoft.com/downloads/). - 2. Run the installer and select "Desktop development with C++" under the "Workloads" tab. This will install the necessary C++ compiler, MSBuild, and Windows SDKs. - 3. Ensure that Python is correctly configured for `node-gyp`. If you have multiple Python versions, you might need to tell `npm` which one to use (e.g., `npm config set python C:\path\to\python.exe`) or ensure your desired Python version is first in your system's PATH. -* **MSYS2/MinGW:** While possible, this can be more complex. You would need to compile/install PortAudio within the MSYS2 environment and ensure `node-gyp` is configured to use the MinGW toolchain. Using the Visual Studio C++ Build Tools is generally more straightforward for `node-gyp` on Windows. - -### macOS -* **Xcode Command Line Tools:** - ```bash - xcode-select --install - ``` - (This installs Clang, make, and other necessary build tools.) -* **PortAudio:** - ```bash - brew install portaudio - ``` - (Homebrew is the easiest way to install PortAudio on macOS.) -* **pkg-config (if needed):** - ```bash - brew install pkg-config - ``` - (Sometimes required for build scripts to find library information.) - -If you see warnings or errors related to `naudiodon` during `npm install` and you *do not* intend to use the STT feature, these can typically be ignored. If you *do* want STT, ensure the above prerequisites are met. - ## Install and Run 1. Make sure you have the requirements above. If you plan to use the STT (Speech-to-Text) feature, also review the "Installation Prerequisites" section regarding `naudiodon`. From 0902733047f0dda5ad782a37145fb639fb8f69bc Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 18:06:10 -0700 Subject: [PATCH 23/26] Create requirements.txt --- logs/requirements.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 logs/requirements.txt diff --git a/logs/requirements.txt b/logs/requirements.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/logs/requirements.txt @@ -0,0 +1 @@ + From 29b22349ec9b9121d0a069e595232da0996fc5f2 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 18:06:52 -0700 Subject: [PATCH 24/26] Add files via upload --- logs/convert.py | 965 +++++++++++++++++++++++++++++++ logs/generate_usernames.py | 1117 ++++++++++++++++++++++++++++++++++++ logs/requirements.txt | 17 + 3 files changed, 2099 insertions(+) create mode 100644 logs/convert.py create mode 100644 logs/generate_usernames.py diff --git a/logs/convert.py b/logs/convert.py new file mode 100644 index 0000000..b15770b --- /dev/null +++ b/logs/convert.py @@ -0,0 +1,965 @@ +import csv +import json +import logging +import sys +import os +import random +from typing import List, Dict +import pandas as pd +from USERNAMES import Get_Usernames +from transformers import AutoTokenizer +from tqdm import tqdm +import torch +from PIL import Image +import base64 +from io import BytesIO + +# Try to import pandas-image-methods for vision data handling +try: + from pandas_image_methods import PILMethods + PANDAS_IMAGE_METHODS_AVAILABLE = True + # Enable PIL methods for pandas + pd.api.extensions.register_series_accessor("pil")(PILMethods) +except ImportError: + PANDAS_IMAGE_METHODS_AVAILABLE = False + logging.warning("pandas-image-methods not available. Install with: pip install pandas-image-methods") + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +# Increase CSV field size limit to avoid errors with very large fields. +maxInt = sys.maxsize +while True: + try: + csv.field_size_limit(maxInt) + break + except OverflowError: + maxInt = int(maxInt/10) + +# Define the original usernames. +ORIGINAL_USERNAMES = [ + "SweaterDog_YT", "SweaterDog", "Sweaterdog", "Foolish_Pear69", "Farquadthegod72", "Hank", + "Gordan", "Perry", "Frederick", "Oliver", "Bill", "Ashley", "Greg", "Treb", "Mia", "Tia", "ALBeRT", "Jason" +] + +# Define outputs that should cause the conversation to be deleted. +BAD_OUTPUTS = { + "My brain just kinda stopped working. Try again.", + "My brain disconnected, try again.", + "Vision is only supported", + "Context length exceeded", + "Image input modality is not enabled", + "An unexpected error occurred", +} + +MINECRAFT_USERNAMES = list(set(Get_Usernames())) # Remove duplicates +duplicate_count = len(Get_Usernames()) - len(MINECRAFT_USERNAMES) + +available_minecraft_usernames = list(MINECRAFT_USERNAMES) # Create a copy for tracking + +global username_replaced_count +global reasoning_replaced_count +username_replaced_count = 0 +reasoning_replaced_count = 0 + +def replace_reasoning_prompt(text: str) -> str: + global reasoning_replaced_count + replaced = False + # Optionally, replace the reasoning prompt if needed. + if replaced: + reasoning_replaced_count += 1 + return text + +def parse_json_safely(text: str) -> List[Dict[str, str]]: + try: + if text.startswith('[') and '],' in text: + parts = text.split('],') + text = parts[0] + ']' + if text.startswith('"') and text.endswith('"'): + text = text[1:-1] + text = text.replace('""', '"') + data = json.loads(text) + if isinstance(data, list) and len(data) > 0 and isinstance(data[0], list): + data = data[0] + converted_messages = [] + for msg in data: + if isinstance(msg, dict) and 'role' in msg and 'content' in msg: + converted_messages.append({ + "from": "human" if msg['role'] in ("system", "user") else "gpt", + "value": msg['content'] + }) + return converted_messages + except Exception as e: + logger.debug(f"Error parsing JSON: {e}") # Suppressed error level + return [{ + "from": "human", + "value": text + }] + +def create_conversation_thread(row: Dict[str, str]) -> List[Dict[str, str]]: + messages = [] + conversation_replacements = {} # Track username replacements for this conversation ONLY + + def replace_usernames_in_message(text: str) -> str: + global username_replaced_count + global available_minecraft_usernames + replaced = False + + if not MINECRAFT_USERNAMES: + return text + + for orig_name in ORIGINAL_USERNAMES: + if orig_name in text: + if orig_name not in conversation_replacements: + # If we've used all available names, reset the list + if not available_minecraft_usernames: + available_minecraft_usernames = list(MINECRAFT_USERNAMES) + # Get a random name from the available ones + replacement = random.choice(available_minecraft_usernames) + available_minecraft_usernames.remove(replacement) + conversation_replacements[orig_name] = replacement + replaced = True + # Use existing replacement for this conversation + text = text.replace(orig_name, conversation_replacements[orig_name]) + + if replaced: + username_replaced_count += 1 + return text + + if row.get("input"): + messages = parse_json_safely(str(row["input"])) + # Apply consistent username replacements to all messages + for msg in messages: + msg["value"] = replace_usernames_in_message(msg["value"]) + + if row.get("output"): + output_text = str(row["output"]).strip() + output_text = replace_usernames_in_message(output_text) + output_text = replace_reasoning_prompt(output_text) + messages.append({ + "from": "gpt", + "value": output_text + }) + + return messages + +def conversation_has_bad_output(messages: List[Dict[str, str]]) -> bool: + for msg in messages: + if msg["from"] == "gpt" and msg["value"].strip() in BAD_OUTPUTS: + return True + return False + +def load_image_from_base64(base64_string: str): + """Convert base64 string to PIL Image""" + try: + if base64_string.startswith('data:'): + base64_string = base64_string.split(',')[1] + + image_bytes = base64.b64decode(base64_string) + image = Image.open(BytesIO(image_bytes)) + + if image.mode in ('RGBA', 'LA', 'P'): + image = image.convert('RGB') + + return image + except Exception as e: + logger.debug(f"Error loading image from base64: {e}") + return Image.new('RGB', (224, 224), color='gray') + +def pil_image_to_parquet_dict(image: Image.Image, filename: str) -> Dict: + """Converts a PIL Image to the dictionary format {bytes, path} for Parquet.""" + img_byte_arr = BytesIO() + # Determine a suitable save format + save_format = image.format if image.format and image.format in Image.SAVE else 'PNG' + + # Handle specific mode conversions if necessary for the chosen format + if save_format == 'PNG' and image.mode not in ['RGB', 'RGBA', 'L', 'P', 'I', 'F']: # Common PNG modes + # Convert to a mode PNG supports, e.g., RGBA to preserve transparency + image_to_save = image.convert("RGBA") + elif save_format == 'JPEG' and image.mode not in ['RGB', 'L', 'CMYK']: + # Convert to a mode JPEG supports + image_to_save = image.convert("RGB") + else: + image_to_save = image + + try: + image_to_save.save(img_byte_arr, format=save_format) + except Exception as e: + logger.warning(f"Could not save image {filename} in format {save_format} (Error: {e}). Attempting PNG.") + save_format = 'PNG' + if image_to_save.mode not in ['RGB', 'RGBA', 'L', 'P', 'I', 'F']: + image_to_save = image.convert("RGBA") # Default to RGBA for PNG + image_to_save.save(img_byte_arr, format=save_format) + + return {"bytes": img_byte_arr.getvalue(), "path": filename} + +def extract_vision_data_from_jsonl(jsonl_path: str) -> List[Dict]: + """Extract vision data from HuggingFace JSONL metadata format""" + if not os.path.isfile(jsonl_path): + logger.error(f"JSONL file not found: {jsonl_path}") + return [] + + logger.info(f"Reading vision metadata: {jsonl_path}") + + # Get the directory containing the JSONL file (should contain images folder) + base_dir = os.path.dirname(jsonl_path) + images_dir = os.path.join(base_dir, 'images') + + if not os.path.isdir(images_dir): + logger.error(f"Images directory not found: {images_dir}") + return [] + + vision_data = [] + + with open(jsonl_path, 'r', encoding='utf-8') as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if not line: + continue + + try: + entry = json.loads(line) + + # Extract required fields - logger.js uses 'input' and 'response', not 'text' + file_name = entry.get('file_name', '') + input_data = entry.get('input', '') + response = entry.get('response', '') + + if not all([file_name, input_data, response]): + logger.warning(f"Line {line_num}: Missing required fields (file_name, input, response)") + continue + + # Check for bad outputs + if response.strip() in BAD_OUTPUTS: + logger.debug(f"Line {line_num}: Skipping bad output") + continue + + # Load the image + image_path = os.path.join(base_dir, file_name) + if not os.path.isfile(image_path): + logger.warning(f"Line {line_num}: Image file not found: {image_path}") + continue + + try: + image = Image.open(image_path) + if image.mode in ('RGBA', 'LA', 'P') and image.format != 'PNG': # PNG handles these modes well + image = image.convert('RGB') # Convert to RGB if not PNG to simplify, or handle more modes in pil_image_to_parquet_dict + except Exception as e: + logger.warning(f"Line {line_num}: Error loading image {image_path}: {e}") + continue + + # Convert PIL image to parquet-compatible dict + relative_image_path_for_dict = file_name # Use the relative path from metadata + image_dict = pil_image_to_parquet_dict(image, relative_image_path_for_dict) + + # Create a separate conversation_replacements for each vision entry + entry_conversation_replacements = {} + + # Replace usernames consistently within this single entry + def replace_usernames_in_text(text: str) -> str: + global username_replaced_count + global available_minecraft_usernames + replaced = False + + if not MINECRAFT_USERNAMES: + return text + + for orig_name in ORIGINAL_USERNAMES: + if orig_name in text: + if orig_name not in entry_conversation_replacements: + if not available_minecraft_usernames: + available_minecraft_usernames = list(MINECRAFT_USERNAMES) + replacement = random.choice(available_minecraft_usernames) + available_minecraft_usernames.remove(replacement) + entry_conversation_replacements[orig_name] = replacement + replaced = True + text = text.replace(orig_name, entry_conversation_replacements[orig_name]) + + if replaced: + username_replaced_count += 1 + return text + + # Parse the input data (conversation history) and build conversation + try: + # The input_data should be JSON string of conversation history + conversation_history = json.loads(input_data) + + # Build the conversation in unsloth format + conversation = [] + + if isinstance(conversation_history, list): + for msg in conversation_history: + if isinstance(msg, dict) and 'role' in msg: + role = msg['role'] + # Map system messages to user role for simplicity + if role == 'system': + role = 'user' + + content_parts = [] + + # Handle different content formats + if 'content' in msg: + content = msg['content'] + if isinstance(content, str): + # Simple string content + text_content = replace_usernames_in_text(content) + content_parts.append({"type": "text", "text": text_content}) + elif isinstance(content, list): + # Array content (multimodal messages) + for part in content: + if isinstance(part, dict): + if part.get('type') == 'text': + text_content = part.get('text', '') + if text_content: + text_content = replace_usernames_in_text(text_content) + content_parts.append({"type": "text", "text": text_content}) + # Skip image parts from history - we'll add the main image to the user message + elif any(key in msg for key in ['text', 'message', 'value']): + # Handle other message formats + text_content = msg.get('text') or msg.get('message') or msg.get('value', '') + if text_content: + text_content = replace_usernames_in_text(str(text_content)) + content_parts.append({"type": "text", "text": text_content}) + + if content_parts: + conversation.append({ + "role": role, + "content": content_parts + }) + + # If no conversation history was parsed or it's empty, create a simple user message + if not conversation: + # Use the raw input data as text + text_content = replace_usernames_in_text(str(input_data).strip()) + conversation.append({ + "role": "user", + "content": [{"type": "text", "text": text_content}] + }) + + # Add the image to the last user message (or create one if none exists) + user_msg_found = False + for i in range(len(conversation) - 1, -1, -1): + if conversation[i]["role"] == "user": + # Add image to this user message + conversation[i]["content"].append({"type": "image", "image": image_dict}) + user_msg_found = True + break + + if not user_msg_found: + # No user message found, create one with just the image + conversation.append({ + "role": "user", + "content": [{"type": "image", "image": image_dict}] + }) + + # Add the assistant response + response_text = replace_usernames_in_text(response) + conversation.append({ + "role": "assistant", + "content": [{"type": "text", "text": response_text}] + }) + + except json.JSONDecodeError: + # If input_data is not valid JSON, create simple conversation + text_content = replace_usernames_in_text(str(input_data).strip()) + response_text = replace_usernames_in_text(response) + + conversation = [ + { + "role": "user", + "content": [ + {"type": "text", "text": text_content}, + {"type": "image", "image": image_dict} + ] + }, + { + "role": "assistant", + "content": [{"type": "text", "text": response_text}] + } + ] + except Exception as e: + logger.debug(f"Line {line_num}: Error parsing conversation history: {e}") + # Fallback to simple conversation + text_content = replace_usernames_in_text(str(input_data).strip()) + response_text = replace_usernames_in_text(response) + + conversation = [ + { + "role": "user", + "content": [ + {"type": "text", "text": text_content}, + {"type": "image", "image": image_dict} + ] + }, + { + "role": "assistant", + "content": [{"type": "text", "text": response_text}] + } + ] + + vision_data.append(conversation) + + except json.JSONDecodeError as e: + logger.warning(f"Line {line_num}: JSON decode error: {e}") + continue + except Exception as e: + logger.warning(f"Line {line_num}: Unexpected error: {e}") + continue + + logger.info(f"Successfully processed {len(vision_data)} vision entries") + return vision_data + +def extract_vision_conversations_from_csv(csv_input: str) -> List[Dict]: + """Extract vision data from CSV with input,image,output columns""" + if not os.path.isfile(csv_input): + logger.debug(f"Vision CSV file not found: {csv_input}") + return [] + + logger.info(f"Reading Vision CSV: {csv_input}") + + try: + df = pd.read_csv(csv_input) + required_columns = ['input', 'image', 'output'] + + if not all(col in df.columns for col in required_columns): + logger.debug(f"Vision CSV missing required columns: {required_columns}") + return [] + + vision_data = [] + + for idx, row in df.iterrows(): + try: + input_text = str(row['input']).strip() + image_b64 = str(row['image']).strip() + output_text = str(row['output']).strip() + + if not all([input_text, image_b64, output_text]): + continue + + # Check for bad outputs + if output_text in BAD_OUTPUTS: + continue + + # Create separate replacements for each row + row_conversation_replacements = {} + + # Replace usernames consistently within this single row + def replace_usernames_in_text(text: str) -> str: + global username_replaced_count + global available_minecraft_usernames + replaced = False + + if not MINECRAFT_USERNAMES: + return text + + for orig_name in ORIGINAL_USERNAMES: + if orig_name in text: + if orig_name not in row_conversation_replacements: + if not available_minecraft_usernames: + available_minecraft_usernames = list(MINECRAFT_USERNAMES) + replacement = random.choice(available_minecraft_usernames) + available_minecraft_usernames.remove(replacement) + row_conversation_replacements[orig_name] = replacement + replaced = True + text = text.replace(orig_name, row_conversation_replacements[orig_name]) + + if replaced: + username_replaced_count += 1 + return text + + input_text = replace_usernames_in_text(input_text) + output_text = replace_usernames_in_text(output_text) + + # Load image from base64 + image = load_image_from_base64(image_b64) + + # Convert PIL image to parquet-compatible dict + image_filename_for_dict = f"image_from_base64_{idx}.png" # Create a placeholder filename + image_dict = pil_image_to_parquet_dict(image, image_filename_for_dict) + + # Create conversation in unsloth format + conversation = [ + { + "role": "user", + "content": [ + {"type": "text", "text": input_text}, + {"type": "image", "image": image_dict} + ] + }, + { + "role": "assistant", + "content": [{"type": "text", "text": output_text}] + } + ] + + vision_data.append(conversation) + + except Exception as e: + logger.warning(f"Row {idx}: Error processing vision data: {e}") + continue + + logger.info(f"Successfully processed {len(vision_data)} vision entries from CSV") + return vision_data + + except Exception as e: + logger.error(f"Error reading vision CSV {csv_input}: {e}") + return [] + +def extract_conversations_from_csv(csv_input: str) -> List[List[Dict[str, str]]]: + if not os.path.isfile(csv_input): + logger.debug(f"CSV file not found: {csv_input}") + return [] + + logger.info(f"Reading CSV: {csv_input}") + valid_rows = [] + extra_issue_rows = 0 + total_extra_columns = 0 + + with open(csv_input, newline='', encoding="utf-8") as csvfile: + reader = csv.reader(csvfile) + try: + header = next(reader) + except StopIteration: + logger.debug(f"CSV file {csv_input} is empty.") + return [] + + header_expected = {"input", "output"} + header_map = {col: idx for idx, col in enumerate(header)} + if not header_expected.issubset(set(header)): + logger.debug(f"CSV header does not contain required columns: {header_expected}") + return [] + + for idx, row in enumerate(reader, start=2): + non_empty_count = sum(1 for field in row if field.strip() != "") + if non_empty_count > 2: + extra = non_empty_count - 2 + extra_issue_rows += 1 + total_extra_columns += extra + logger.info(f"Row {idx} has {extra} extra filled column(s); row skipped.") + continue + row_dict = {col: row[header_map[col]] if header_map[col] < len(row) else "" for col in header_expected} + valid_rows.append(row_dict) + + logger.info(f"Excluded {extra_issue_rows} row(s) with extra columns (total extra columns: {total_extra_columns}).") + df = pd.DataFrame(valid_rows) + conversations = [] + for idx, row in df.iterrows(): + conv = create_conversation_thread(row) + if conversation_has_bad_output(conv): + continue + conversations.append(conv) + return conversations + +def extract_vision_conversations_from_csv(csv_input: str) -> List[Dict]: + """Extract vision data from CSV with input,image,output columns""" + if not os.path.isfile(csv_input): + logger.debug(f"Vision CSV file not found: {csv_input}") + return [] + + logger.info(f"Reading Vision CSV: {csv_input}") + + try: + df = pd.read_csv(csv_input) + required_columns = ['input', 'image', 'output'] + + if not all(col in df.columns for col in required_columns): + logger.debug(f"Vision CSV missing required columns: {required_columns}") + return [] + + vision_data = [] + + for idx, row in df.iterrows(): + try: + input_text = str(row['input']).strip() + image_b64 = str(row['image']).strip() + output_text = str(row['output']).strip() + + if not all([input_text, image_b64, output_text]): + continue + + # Check for bad outputs + if output_text in BAD_OUTPUTS: + continue + + # Create separate replacements for each row + row_conversation_replacements = {} + + # Replace usernames consistently within this single row + def replace_usernames_in_text(text: str) -> str: + global username_replaced_count + global available_minecraft_usernames + replaced = False + + if not MINECRAFT_USERNAMES: + return text + + for orig_name in ORIGINAL_USERNAMES: + if orig_name in text: + if orig_name not in row_conversation_replacements: + if not available_minecraft_usernames: + available_minecraft_usernames = list(MINECRAFT_USERNAMES) + replacement = random.choice(available_minecraft_usernames) + available_minecraft_usernames.remove(replacement) + row_conversation_replacements[orig_name] = replacement + replaced = True + text = text.replace(orig_name, row_conversation_replacements[orig_name]) + + if replaced: + username_replaced_count += 1 + return text + + input_text = replace_usernames_in_text(input_text) + output_text = replace_usernames_in_text(output_text) + + # Load image from base64 + image = load_image_from_base64(image_b64) + + # Convert PIL image to parquet-compatible dict + image_filename_for_dict = f"image_from_base64_{idx}.png" # Create a placeholder filename + image_dict = pil_image_to_parquet_dict(image, image_filename_for_dict) + + # Create conversation in unsloth format + conversation = [ + { + "role": "user", + "content": [ + {"type": "text", "text": input_text}, + {"type": "image", "image": image_dict} + ] + }, + { + "role": "assistant", + "content": [{"type": "text", "text": output_text}] + } + ] + + vision_data.append(conversation) + + except Exception as e: + logger.warning(f"Row {idx}: Error processing vision data: {e}") + continue + + logger.info(f"Successfully processed {len(vision_data)} vision entries from CSV") + return vision_data + + except Exception as e: + logger.error(f"Error reading vision CSV {csv_input}: {e}") + return [] + +def extract_conversations_from_json(json_input: str) -> List[List[Dict[str, str]]]: + logger.info(f"Reading JSON: {json_input}") + try: + with open(json_input, 'r', encoding='utf-8') as f: + data = json.load(f) + except Exception as e: + logger.debug(f"Error reading {json_input}: {e}") + return [] + conversations = [] + for conv in data: + messages = [] + if "system" in conv and conv["system"]: + system_text = str(conv["system"]).strip() + system_text = replace_reasoning_prompt(system_text) + messages.append({"from": "human", "value": system_text}) + if "user" in conv and conv["user"]: + user_text = str(conv["user"]).strip() + user_text = replace_reasoning_prompt(user_text) + messages.append({"from": "human", "value": user_text}) + if "assistant" in conv and conv["assistant"]: + assistant_text = str(conv["assistant"]).strip() + assistant_text = replace_reasoning_prompt(assistant_text) + messages.append({"from": "gpt", "value": assistant_text}) + if messages and not conversation_has_bad_output(messages): + conversations.append(messages) + return conversations + +if __name__ == "__main__": + # Handle vision dataset processing + if '--vision' in sys.argv: + if not PANDAS_IMAGE_METHODS_AVAILABLE: + logger.error("pandas-image-methods is required for --vision flag. Install with: pip install pandas-image-methods") + sys.exit(1) + + # Look for vision data files + vision_files = [] + + # Check for HuggingFace format (metadata.jsonl) + metadata_jsonl = "vision_dataset/metadata.jsonl" + if os.path.isfile(metadata_jsonl): + vision_files.append((metadata_jsonl, 'jsonl')) + + # Check for CSV format vision logs + vision_csv = "vision_logs.csv" + if os.path.isfile(vision_csv): + vision_files.append((vision_csv, 'csv')) + + # Check for numbered files + i = 1 + while True: + jsonl_file = f"vision_dataset{i}/metadata.jsonl" + csv_file = f"vision_logs{i}.csv" + found_any = False + + if os.path.isfile(jsonl_file): + vision_files.append((jsonl_file, 'jsonl')) + found_any = True + if os.path.isfile(csv_file): + vision_files.append((csv_file, 'csv')) + found_any = True + + if not found_any: + break + i += 1 + + if not vision_files: + logger.error("No vision dataset files found for --vision flag!") + logger.info("Looking for:") + logger.info(" - vision_dataset/metadata.jsonl (HuggingFace format)") + logger.info(" - vision_logs.csv (CSV format)") + logger.info(" - vision_datasetN/metadata.jsonl") + logger.info(" - vision_logsN.csv") + sys.exit(1) + + logger.info(f"Found {len(vision_files)} vision files: {[f for f, _ in vision_files]}") + + # Process all vision files + all_vision_data = [] + total_count = 0 + file_counts = {} + + for file_path, file_type in vision_files: + if file_type == 'jsonl': + vision_data = extract_vision_data_from_jsonl(file_path) + else: # csv + vision_data = extract_vision_conversations_from_csv(file_path) + + file_counts[file_path] = len(vision_data) + all_vision_data.extend(vision_data) + total_count += len(vision_data) + + if not all_vision_data: + logger.error("No valid vision data found!") + sys.exit(1) + + # Check for tokenization flags + do_tokenize = '--tokenize' in sys.argv + tokenizer = None + device = "cuda" if torch.cuda.is_available() else "cpu" + if do_tokenize: + logger.info("Loading tokenizer 'unsloth/Llama-3.2-1B-Instruct-bnb-4bit'...") + tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct-bnb-4bit") + + # Tokenize if requested + if do_tokenize and tokenizer: + all_texts = [] + for entry in all_vision_data: + all_texts.append(entry['input']) + all_texts.append(entry['output']) + + total_tokens = 0 + logger.info("Tokenizing vision data...") + for text in tqdm(all_texts, desc="Tokenizing", unit="msg"): + encoded = tokenizer(text, return_tensors="pt") + input_ids = encoded["input_ids"].to(device) + total_tokens += input_ids.shape[-1] + logger.info(f"Total tokens across all vision data: {total_tokens}") + + # Remove duplicates based on conversation content + unique_vision_data = [] + seen_keys = set() + + for conversation in all_vision_data: + # Create a key from the text content of the conversation + key_parts = [] + for msg in conversation: + if msg["role"] in ["user", "assistant"]: + for content_part in msg["content"]: + if content_part["type"] == "text": + key_parts.append(content_part["text"].strip()) + + key = tuple(key_parts) + if key not in seen_keys: + seen_keys.add(key) + unique_vision_data.append(conversation) + + all_vision_data = unique_vision_data + logger.info(f"After deduplication: {len(all_vision_data)} unique vision conversations") + + # Shuffle the data + random.shuffle(all_vision_data) + + # Images are already in parquet-compatible dict format within all_vision_data + # No further image processing needed here before creating DataFrame + + # Create DataFrame with conversations column (unsloth format) + df_final = pd.DataFrame({"conversations": all_vision_data}) + + output_parquet = "Andy_vision_conversations.parquet" + + logger.info(f"Writing vision dataset to {output_parquet}") + try: + df_final.to_parquet(output_parquet, index=False) + abs_path = os.path.abspath(output_parquet) + logger.info(f"Successfully wrote vision dataset to: {abs_path}") + except Exception as e: + logger.error(f"Error writing Parquet file: {e}") + sys.exit(1) + + logger.info( + f"\n" + f"--------------------------------------------------------------------------------------\n" + f"Vision conversion complete! Processed {total_count} vision conversations from {len(vision_files)} files.\n" + f"Replaced {username_replaced_count} usernames across conversations.\n" + f"Total usernames available: {len(MINECRAFT_USERNAMES)}\n" + f"Final dataset size: {len(all_vision_data)} unique conversations\n" + f"--------------------------------------------------------------------------------------\n" + ) + + # Log counts per file + for file_path, count in file_counts.items(): + logger.info(f"File '{file_path}' contributed {count} conversations.") + + sys.exit(0) + + # Regular processing for non-vision data + base_filename = "Andy_pre" + files = [] + i = 1 + while True: + csv_file = f"{base_filename}{i}.csv" + json_file = f"{base_filename}{i}.json" + if not os.path.isfile(csv_file) and not os.path.isfile(json_file): + break + if os.path.isfile(csv_file): + files.append((csv_file, 'csv')) + if os.path.isfile(json_file): + files.append((json_file, 'json')) + i += 1 + + if not files: + logger.info("No CSV or JSON files found with pattern Andy_preN.(csv|json)") + sys.exit(1) + + # Check for tokenization flags + do_tokenize = '--tokenize' in sys.argv + do_tokenize_largest = '--tokenize_largest' in sys.argv + tokenizer = None + device = "cuda" if torch.cuda.is_available() else "cpu" + if do_tokenize or do_tokenize_largest: + logger.info("Loading tokenizer 'unsloth/Llama-3.2-1B-Instruct-bnb-4bit'...") + tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct-bnb-4bit") + + logger.info(f"Found {len(files)} files: {[f for f, _ in files]}") + combined_conversations = [] + total_count = 0 + file_conversation_counts = {} + + for file, ftype in files: + if ftype == 'csv': + convs = extract_conversations_from_csv(file) + else: + convs = extract_conversations_from_json(file) + file_conversation_counts[file] = len(convs) + combined_conversations.extend(convs) + total_count += len(convs) + + # Tokenize all data and count tokens + if do_tokenize: + all_texts = [msg["value"] for conv in combined_conversations for msg in conv] + total_tokens = 0 + logger.info("Tokenizing all data with progress bar and GPU acceleration...") + for text in tqdm(all_texts, desc="Tokenizing", unit="msg"): + encoded = tokenizer(text, return_tensors="pt") + input_ids = encoded["input_ids"].to(device) + total_tokens += input_ids.shape[-1] + logger.info(f"Total tokens across all data: {total_tokens}") + + # Tokenize 5 largest conversations + if do_tokenize_largest: + conv_token_counts = [] + logger.info("Tokenizing largest conversations with progress bar and GPU acceleration...") + for conv in tqdm(combined_conversations, desc="Tokenizing convs", unit="conv"): + text = "\n".join(msg["value"] for msg in conv) + encoded = tokenizer(text, return_tensors="pt") + input_ids = encoded["input_ids"].to(device) + conv_token_counts.append((input_ids.shape[-1], conv)) + # sort and take top 5 + conv_token_counts.sort(key=lambda x: x[0], reverse=True) + top5 = conv_token_counts[:5] + max_tokens = max(count for count, _ in top5) + for idx, (count, _) in enumerate(top5, 1): + logger.info(f"Top {idx} conversation tokens: {count}") + logger.info(f"Maximum tokens in top 5: {max_tokens}") + + # Clean up GPT messages + for conv in combined_conversations: + for msg in conv: + if msg["from"] == "gpt": + msg["value"] = msg["value"].replace("\nundefined\n", "").replace("\nundefined", "").strip() + + unique_conversations = [] + seen_keys = set() + for conv in combined_conversations: + if len(conv) < 2: + key = tuple(msg["value"] for msg in conv) + else: + key = (conv[0]["value"].strip(), conv[-1]["value"].strip()) + if key not in seen_keys: + seen_keys.add(key) + unique_conversations.append(conv) + combined_conversations = unique_conversations + + random.shuffle(combined_conversations) + + # Handle codeOnly flag + if '--codeOnly' in sys.argv: + coding = [] + noncoding = [] + for conv in combined_conversations: + has_code = any("```" in msg["value"] for msg in conv) or ( + conv and conv[-1]["from"] == "gpt" and "!newAction(" in conv[-1]["value"] + ) + if has_code: + coding.append(conv) + else: + noncoding.append(conv) + logger.info(f"Found {len(coding)} coding examples and {len(noncoding)} non-coding examples.") + noncoding_count = int(round(0.15 * len(coding))) + if noncoding_count > len(noncoding): + noncoding_count = len(noncoding) + selected_noncoding = random.sample(noncoding, noncoding_count) if noncoding_count > 0 else [] + final_conversations = coding + selected_noncoding + random.shuffle(final_conversations) + combined_conversations = final_conversations + + if '--codeOnly' in sys.argv: + df_final = pd.DataFrame({"conversations": combined_conversations}) + output_parquet = "Andy_conversations_codeOnly.parquet" + else: + df_final = pd.DataFrame({"conversations": combined_conversations}) + output_parquet = "Andy_conversations.parquet" + + logger.info(f"Writing output to {output_parquet}") + try: + df_final.to_parquet(output_parquet, index=False) + abs_path = os.path.abspath(output_parquet) + logger.info(f"Successfully wrote output to: {abs_path}") + except Exception as e: + logger.debug(f"Error writing Parquet file: {e}") + sys.exit(1) + + logger.info( + f"\n" + f"--------------------------------------------------------------------------------------\n\n" + f"Conversion complete! Processed {total_count} conversations from {len(files)} files. \n" + f"Replaced {username_replaced_count} usernames across {total_count} conversations. \n" + f"Total amount of usernames to choose from: {len(MINECRAFT_USERNAMES)} (removed {duplicate_count} duplicates) \n" + f"--------------------------------------------------------------------------------------\n\n" + ) + + # Log conversation counts per file. + for file, count in file_conversation_counts.items(): + logger.info(f"File '{file}' contributed {count} conversations.") diff --git a/logs/generate_usernames.py b/logs/generate_usernames.py new file mode 100644 index 0000000..ede8c00 --- /dev/null +++ b/logs/generate_usernames.py @@ -0,0 +1,1117 @@ +# -*- coding: utf-8 -*- +# ^^^ Add encoding declaration for potentially wider character sets in lists +# --- Imports --- +import random +import os +import sys # Import sys to access command-line arguments +import itertools # Import itertools for generating combinations + + +# Increase recursion depth if needed for large set operations (unlikely but possible) +# sys.setrecursionlimit(2000) + +# --- Massively Expanded Word Lists (Targeting 750+ unique per category) --- + +# NOTE: Generating truly meaningful and diverse lists of this size requires +# significant effort or large external datasets. These lists are expanded +# considerably using thematic variations, synonyms, and related concepts. +# They aim for the quantity requested, combining common and more specific terms. + +PROFESSIONS = list(set([ + # Core & Fantasy + "Wizard", "Maven", "Guru", "Master", "Apprentice", "Hunter", "Gatherer", + "Coder", "Artist", "Chef", "Pilot", "Doctor", "Teacher", "Scientist", + "Musician", "Gamer", "Writer", "Explorer", "Builder", "Creator", + "Analyst", "Architect", "Strategist", "Voyager", "Dreamer", "Engineer", + "Designer", "Bard", "Rogue", "Paladin", "Alchemist", "Druid", "Ranger", + "Sentinel", "Guardian", "Navigator", "Captain", "Commander", "Sergeant", + "Healer", "Oracle", "Sage", "Scholar", "Scribe", "Merchant", "Trader", + "Blacksmith", "Jeweler", "Cartographer", "Monk", "Necromancer", "Summoner", + "Technomancer", "Hacker", "Broker", "Agent", "Scout", "Spy", "Jester", + "Minstrel", "Curator", "Warden", "Keeper", "Chronicler", "Inventor", + "Mechanist", "Artificer", "Gladiator", "Nomad", "Hermit", "Shaman", + "Geologist", "Biologist", "Physicist", "Astronomer", "Linguist", "Historian", + "Philosopher", "Enforcer", "Detective", "Journalist", "Photographer", "Sculptor", + # Expansion + "Mage", "Sorcerer", "Warlock", "Cleric", "Priest", "Templar", "Crusader", + "Berserker", "Barbarian", "Warrior", "Knight", "Duelist", "Swashbuckler", + "Assassin", "Thief", "Ninja", "Samurai", "Ronin", "Geomancer", "Pyromancer", + "Cryomancer", "Aeromancer", "Hydromancer", "Chronomancer", "Illusionist", + "Enchanter", "Runesmith", "Wordsmith", "Beastmaster", "Tamer", "Falconer", + "Herbalist", "Apothecary", "Poisoner", "Tinkerer", "Demolitionist", + "Pathfinder", "Trailblazer", "Surveyor", "Prospector", "Miner", "Lumberjack", + "Farmer", "Fisherman", "Shepherd", "Vintner", "Brewer", "Baker", "Butcher", + "Candlemaker", "Cobbler", "Cooper", "Fletcher", "Innkeeper", "Mason", + "Potter", "Sailor", "Shipwright", "Tailor", "Tanner", "Weaver", "Woodcarver", + "Governor", "Chancellor", "Diplomat", "Ambassador", "Councilor", "Judge", + "Librarian", "Archivist", "Mathematician", "Astronomer", "Botanist", "Zoologist", + "Archeologist", "Anthropologist", "Sociologist", "Psychologist", "Mentor", + "Tutor", "Instructor", "Professor", "Dean", "Headmaster", "Principal", + "Acolyte", "Initiate", "Neophyte", "Disciple", "Follower", "Zealot", "Cultist", + "Prophet", "Seer", "Diviner", "Mystic", "Visionary", "Ascetic", "Pilgrim", + "Mercenary", "BountyHunter", "Privateer", "Corsair", "Smuggler", "Outlaw", + "Bandit", "Rebel", "Revolutionary", "FreedomFighter", "Gladiator", + "Charioteer", "Pitfighter", "Champion", "Hero", "Villain", "Antihero", + "Adventurer", "Soldier", "Officer", "General", "Admiral", "Marshal", + "Tactician", "Quartermaster", "Medic", "CombatMedic", "FieldAgent", + "Operative", "DoubleAgent", "Infiltrator", "Saboteur", "Courier", "Messenger", + "Herald", "TownCrier", "Guide", "Interpreter", "Translator", "Negotiator", + "Arbitrator", "Mediator", "Executioner", "Jailer", "Constable", "Sheriff", + "Bailiff", "Investigator", "Foreman", "Supervisor", "Manager", "Director", + "Executive", "Administrator", "Secretary", "Clerk", "Accountant", "Auditor", + "Actuary", "Banker", "Financier", "Investor", "Speculator", "Entrepreneur", + "Artisan", "Craftsman", "Technician", "Mechanic", "Operator", "Programmer", + "Developer", "SysAdmin", "NetAdmin", "DBAdmin", "Webmaster", "ContentCreator", + "Influencer", "Blogger", "Vlogger", "Podcaster", "Streamer", "Moderator", + "Animator", "Illustrator", "Painter", "Engraver", "Printer", "Composer", + "Arranger", "Conductor", "Performer", "Actor", "Dancer", "Choreographer", + "Orator", "Storyteller", "Poet", "Playwright", "Novelist", "Editor", + "Publisher", "Critic", "Reviewer", "Commentator", "Pundit", "Host", + "Announcer", "Reporter", "Anchor", "Correspondent", "Cameraman", "Director", + "Producer", "SoundEngineer", "LightingTech", "SetDesigner", "Costumer", + "MakeupArtist", "Stylist", "Barber", "Beautician", "Therapist", "Counselor", + "Coach", "Trainer", "Dietitian", "Nurse", "Surgeon", "Dentist", "Optometrist", + "Pharmacist", "Paramedic", "Veterinarian", "Caretaker", "Nanny", "Butler", + "Maid", "Valet", "Chauffeur", "Bodyguard", "Bouncer", "Doorman", "Concierge", + "Bellhop", "Waiter", "Bartender", "Sommelier", "Barista", "FlightAttendant", + "Librarian", "MuseumGuide", "ParkRanger", "Lifeguard", "Firefighter", + "PoliceOfficer", "Detective", "Profiler", "IntelligenceAgent", "Analyst", + "Cryptographer", "Codebreaker", "Linguist", "Archivist", "Researcher", + "LabTechnician", "FieldResearcher", "Experimentalist", "Theorist", "Statistician", + "DataScientist", "MachineLearningEngineer", "AI_Specialist", "Roboticist", + "NetworkEngineer", "SecurityAnalyst", "PenTester", "EthicalHacker", + "ForensicAnalyst", "GameDeveloper", "LevelDesigner", "NarrativeDesigner", + "SoundDesigner", "Tester", "QA_Engineer", "CommunityManager", "SupportAgent", + "Salesperson", "Marketer", "Advertiser", "PR_Specialist", "Recruiter", + "HR_Manager", "Lawyer", "Paralegal", "Judge", "Politician", "Activist", + "Lobbyist", "UnionRep", "Volunteer", "Philanthropist", "SocialWorker", + "Consultant", "Freelancer", "Contractor", "GigWorker", "SoleProprietor", + "Journeyman", "Expert", "Virtuoso", "Prodigy", "Maestro", "Specialist", + "Generalist", "Pioneer", "Innovator", "Futurist", "Visionary", "Leader", + "Follower", "Helper", "Assistant", "Associate", "Partner", "Collaborator", + "Competitor", "Rival", "Mentor", "Protege", "Patron", "Client", "Customer", + "Patient", "Student", "Citizen", "Resident", "Immigrant", "Expatriate", + "Refugee", "Tourist", "Traveler", "Wanderer", "Drifter", "Outcast", "Exile", + "Survivor", "Witness", "Observer", "Participant", "Subject", "Candidate", + "Contender", "Challenger", "Victor", "Loser", "Slave", "Servant", "Peasant", + "Serf", "Commoner", "Nobleman", "Aristocrat", "Royalty", "Emperor", "King", + "Queen", "Prince", "Princess", "Duke", "Duchess", "Marquis", "Count", + "Viscount", "Baron", "Lord", "Lady", "Sir", "Dame", "Esquire", "Gentleman", + # Add more niche/specific/combined roles if needed to reach 750 + "SkyCaptain", "DeepMiner", "GeneSplicer", "MemeLord", "DataWrangler", + "SynthWeaver", "BioHacker", "RealityBender", "VoidWalker", "StarSeer", + "TimeWarden", "SoulBinder", "ShadowDancer", "LightBringer", "StormCaller", + "EarthShaker", "FlameWielder", "IceShaper", "PlantWhisperer", "MetalShaper", + "BloodMage", "SpiritTalker", "DreamWalker", "NightmareWeaver", "ChaosAgent", + "OrderKeeper", "TruthSeeker", "LieSmith", "FateSpinner", "DoomBringer", + "HopeBearer", "MemoryKeeper", "LoreMaster", "MythMaker", "LegendSeeker", + "ClockMaker", "MapMaker", "ToyMaker", "Perfumer", "GloveMaker", "HatMaker", + "LockSmith", "GemCutter", "GlassBlower", "StoneMason", "RoadBuilder", + "BridgeBuilder", "CanalDigger", "WellDigger", "ChimneySweep", "RatCatcher", + "GongFarmer", "Mudlark", "Scavenger", "Recycler", "JunkDealer", "PawnBroker", + "MoneyLender", "BookBinder", "Illuminator", "Calligrapher", "Courtier", + "Emissary", "Legate", "Envoy", "Plenipotentiary", "Spymaster", "AssassinGuildLeader", + "ThiefGuildMaster", "MercenaryCaptain", "PirateKing", "Warlord", "Chieftain", + "TribalElder", "MedicineMan", "WitchDoctor", "HighPriest", "Abbot", "Bishop", + "Cardinal", "Pope", "Imam", "Rabbi", "Guru", "Sensei", "Roshi", "Lama", + "DruidArchon", "RangerLord", "PaladinOrderMaster", "Archmage", "MasterAssassin", + "Grandmaster", "CelestialPilot", "QuantumPhysicist", "NeuroScientist", + "AstroBiologist", "CryptoZoologist", "ParaPsychologist", "Ufologist", + "ConspiracyTheorist", "MythBuster", "FactChecker", "Debunker", "Propagandist", + "SpinDoctor", "Satirist", "Parodist", "Impersonator", "Mimic", "Ventriloquist", + "Puppeteer", "CircusMaster", "RingLeader", "Acrobat", "Contortionist", + "Strongman", "KnifeThrower", "FireEater", "SwordSwallower", "Magician", + "EscapeArtist", "Mentalist", "Hypnotist", "AnimalTrainer", "Clown", "Harlequin", + "Pierrot", "Pantomime", "CharacterActor", "Stuntman", "VoiceActor", "Narrator", + "Auctioneer", "Realtor", "Surveyor", "Appraiser", "InsuranceAgent", + "Underwriter", "ClaimsAdjuster", "LossPreventer", "SecurityGuard", + "AirTrafficController", "TrainConductor", "BusDriver", "TaxiDriver", + "Trucker", "DeliveryDriver", "Dispatcher", "Logistician", "SupplyChainManager", + "WarehouseWorker", "ForkliftOperator", "CraneOperator", "HeavyEquipmentOp", + "Welder", "Pipefitter", "Electrician", "Plumber", "HVACTech", "Carpenter", + "Roofer", "Painter", "Drywaller", "Floorer", "TileSetter", "Landscaper", + "Arborist", "Groundskeeper", "PoolCleaner", "Exterminator", "Janitor", + "Custodian", "SanitationWorker", "RecyclingOperator", "DemolitionWorker", + "HazardousMaterialsTech", "SafetyInspector", "BuildingInspector", "FoodInspector", + "HealthInspector", "CustomsOfficer", "ImmigrationOfficer", "BorderPatrolAgent", + "ParkRanger", "FishAndGameWarden", "Forester", "Conservationist", + "Ecologist", "Oceanographer", "Meteorologist", "Climatologist", "Volcanologist", + "Seismologist", "Paleontologist", "Mineralogist", "Petrologist", "Hydrologist", + "Glaciologist", "SoilScientist", "Agronomist", "Horticulturist", "Florist", + "Ichthyologist", "Herpetologist", "Ornithologist", "Entomologist", "Mammalogist", + "Primatologist", "Microbiologist", "Virologist", "Bacteriologist", "Mycologist", + "Parasitologist", "Immunologist", "Geneticist", "Epidemiologist", "Toxicologist", + "Pharmacologist", "Pathologist", "Radiologist", "Anesthesiologist", "Cardiologist", + "Dermatologist", "Endocrinologist", "Gastroenterologist", "Hematologist", + "Nephrologist", "Neurologist", "Oncologist", "Ophthalmologist", "Orthopedist", + "Otolaryngologist", "Pediatrician", "Psychiatrist", "Pulmonologist", "Rheumatologist", + "Urologist", "Podiatrist", "Chiropractor", "Acupuncturist", "MassageTherapist", + "PhysicalTherapist", "OccupationalTherapist", "SpeechTherapist", "Audiologist", + "Midwife", "Doula", "Mortician", "Embalmer", "Coroner", "MedicalExaminer", + "ForensicScientist", "BallisticsExpert", "FingerprintAnalyst", "DNAAnalyst", + "DocumentExaminer", "ArsonInvestigator", "AccidentReconstructionist", + "PolygraphExaminer", "K9Officer", "MountedPolice", "SWATOfficer", "HostageNegotiator", + "BombTechnician", "AirMarshal", "SecretServiceAgent", "FBI_Agent", "CIA_Agent", + "NSA_Analyst", "DEA_Agent", "ATF_Agent", "US_Marshal", "DiplomaticSecurity", + "MilitaryPolice", "CoastGuard", "Infantryman", "Artilleryman", "CavalryScout", + "TankCommander", "CombatEngineer", "Pilot", "Navigator", "DroneOperator", + "Submariner", "SEAL", "GreenBeret", "Ranger", "DeltaForce", "Pararescueman", + "IntelligenceOfficer", "LogisticsOfficer", "PersonnelOfficer", "PublicAffairs", + "Chaplain", "MilitaryLawyer", "MilitaryDoctor", "FlightSurgeon", "CyberWarfare", + "SpaceForceGuardian", "TestPilot", "Astronaut", "MissionControl", "RocketScientist", + "SatelliteTech", "SpaceSystemsOp", "PlanetaryScientist", "ExoBiologist", + "Terraformer", "AstroMiner", "StellarCartographer", "WarpFieldSpecialist", + "Cyberneticist", "AndroidTechnician", "AI_Psychologist", "SynthProgrammer", + "HoloDesigner", "VR_Architect", "NeuralInterfaceTech", "BioEnhancementSpec", + "CloningTechnician", "CryonicsSpecialist", "Nanotechnologist", "QuantumMechanic", + "ZeroG_Welder", "AsteroidMiner", "LunarGeologist", "MartianBotanist", + "TitanFisherman", "EuropaExplorer", "GasGiantProspector", "VoidSurveyor", + "AlienLinguist", "XenoAnthropologist", "FirstContactSpec", "GalacticDiplomat", + "StarshipCaptain", "FleetAdmiral", "SectorCommander", "PlanetaryGovernor", + "ImperialGuard", "RebelLeader", "SmugglerCaptain", "BountyGuildMaster", + "InfoBroker", "CyberRunner", "StreetSamurai", "Rigger", "Decker", "Technoshaman", + "DataThief", "CorpSecOfficer", "Fixer", "Ripperdoc", "Joytech", "SimstimArtist", + "MediaProducer", "Netcaster", "TruthSayer", "ProphetOfWoe", "CultLeader", + "DoomsdayPrepper", "Survivalist", "Homesteader", "Recluse", "Misanthrope", + "Philanthropist", "Humanitarian", "Activist", "Advocate", "Organizer", + "Educator", "Motivator", "Inspirer", "RoleModel", "Iconoclast", "Maverick", + "Renegade", "Pioneer", "Trailblazer", "StandardBearer", "Vanguard", "Luminary", "Andy-4-" +])) + +ADJECTIVES = list(set([ + # Core + "Code", "Music", "Official", "Streamer", "Tech", "Starry", "Simple", + "Big", "Gaming", "Workout", "DIY", "Mindful", "Foodie", "Travel", + "Pixel", "Byte", "Data", "Synth", "Digital", "Analog", "Creative", + "Brave", "Happy", "Strong", "Quiet", "Agile", "Electric", "Mystic", + "Fierce", "Clever", "Speedy", "Golden", "Silver", "Cosmic", "Infinite", + "Quantum", "Stealthy", "Radiant", "Crimson", "Azure", "Mysterious", + "Vivid", "Silent", "Roaring", "Frozen", "Burning", "Virtual", "Cyber", + "Galactic", "Stellar", "Solar", "Lunar", "Arcane", "Ancient", "Forgotten", + "Hidden", "Secret", "Whispering", "Shadowy", "Luminous", "Glowing", + "Magnetic", "Sonic", "Crystal", "Diamond", "Emerald", "Ruby", "Sapphire", + "Bronze", "Iron", "Steel", "Obsidian", "Molten", "Icy", "Blazing", + "Stormy", "Windy", "Rainy", "Sunny", "Cloudy", "Misty", "Ethereal", + "Nimble", "Swift", "Bold", "Noble", "Regal", "Royal", "Humble", + "Gentle", "Savage", "Wild", "Primal", "Eternal", "Boundless", "Supreme", + "Ultimate", "Perfect", "Flawless", "Broken", "Glitched", "Corrupted", + "Sacred", "Hallowed", "Cursed", "Haunted", "Undead", "Living", "Breathing", + "Mechanical", "Organic", "Temporal", "Spatial", "Abstract", "Concrete", + "Logical", "Chaotic", "Mythic", "Legendary", "Epic", "Rare", "Common", + # Expansion + "Grand", "Great", "Small", "Tiny", "Huge", "Massive", "Micro", "Nano", + "Quick", "Slow", "Fast", "Rapid", "Sudden", "Gradual", "Patient", "Eager", + "Calm", "Angry", "Furious", "Peaceful", "Serene", "Turbulent", "Violent", + "Kind", "Cruel", "Mean", "Nice", "Generous", "Stingy", "Selfish", "Altruistic", + "Honest", "Deceitful", "True", "False", "Fake", "Genuine", "Authentic", + "Loyal", "Treacherous", "Faithful", "Fickle", "Brave", "Cowardly", "Timid", + "Fearless", "Courageous", "Daring", "Reckless", "Cautious", "Prudent", + "Wise", "Foolish", "Ignorant", "Knowledgeable", "Learned", "Erudite", + "Simple", "Complex", "Intricate", "Elaborate", "Plain", "Ornate", "Fancy", + "Beautiful", "Ugly", "Hideous", "Gorgeous", "Attractive", "Repulsive", + "Clean", "Dirty", "Filthy", "Pristine", "Pure", "Tainted", "Polluted", + "Bright", "Dim", "Dark", "Gloomy", "Murky", "Shining", "Gleaming", "Dull", + "Sharp", "Blunt", "Pointed", "Rounded", "Smooth", "Rough", "Coarse", "Fine", + "Hard", "Soft", "Firm", "Flabby", "Rigid", "Flexible", "Pliant", "Stiff", + "Heavy", "Light", "Weightless", "Dense", "Sparse", "Thick", "Thin", + "Wide", "Narrow", "Broad", "Slim", "Fat", "Skinny", "Lean", "Stout", + "Tall", "Short", "Long", "Brief", "High", "Low", "Deep", "Shallow", + "Hot", "Cold", "Warm", "Cool", "Tepid", "Frigid", "Scalding", "Arctic", + "Tropical", "Temperate", "Arid", "Humid", "Dry", "Wet", "Damp", "Soggy", + "Loud", "Noisy", "Silent", "Mute", "Hushed", "Resonant", "Melodious", + "Harmonious", "Discordant", "Cacophonous", "Sweet", "Sour", "Bitter", + "Salty", "Spicy", "Savory", "Bland", "Tasty", "Delicious", "Nasty", + "Fragrant", "Aromatic", "Pungent", "Stinky", "Odorous", "Scented", + "Red", "Orange", "Yellow", "Green", "Blue", "Purple", "Violet", "Indigo", + "Pink", "Brown", "Black", "White", "Gray", "Beige", "Cream", "Maroon", + "Navy", "Teal", "Aqua", "Lime", "Olive", "Gold", "Copper", "Platinum", + "Chromatic", "Iridescent", "Opalescent", "Pearly", "Metallic", "Matte", + "Glossy", "Transparent", "Translucent", "Opaque", "Clear", "Cloudy", + "Young", "Old", "New", "Aged", "Antique", "Modern", "Futuristic", "Retro", + "Primeval", "Prehistoric", "Medieval", "Victorian", "Contemporary", + "Living", "Dead", "Undead", "Spectral", "Ghostly", "Phantom", "Corporeal", + "Physical", "Mental", "Spiritual", "Emotional", "Psychic", "Astral", + "Divine", "Infernal", "Demonic", "Angelic", "Celestial", "Fey", "Elemental", + "Natural", "Artificial", "Synthetic", "Simulated", "Augmented", "Bionic", + "Robotic", "Clockwork", "SteamPowered", "Nuclear", "SolarPowered", "WindPowered", + "GeoThermal", "BioLuminescent", "Photosynthetic", "Radioactive", "Toxic", + "Venomous", "Poisonous", "Inert", "Volatile", "Stable", "Unstable", + "Explosive", "Implosive", "Acidic", "Alkaline", "Neutral", "Charged", + "Magnetic", "Conductive", "Insulating", "Resistant", "Absorbent", "Reflective", + "Emissive", "Stealthy", "Visible", "Invisible", "Camouflaged", "Disguised", + "Known", "Unknown", "Familiar", "Strange", "Exotic", "Foreign", "Alien", + "Native", "Indigenous", "Local", "Regional", "National", "Global", "Universal", + "Public", "Private", "Personal", "Communal", "Collective", "Individual", + "Open", "Closed", "Locked", "Sealed", "Guarded", "Protected", "Vulnerable", + "Exposed", "Secure", "Insecure", "Safe", "Dangerous", "Hazardous", "Risky", + "Beneficial", "Harmful", "Helpful", "Useless", "Useful", "Valuable", + "Worthless", "Priceless", "Cheap", "Expensive", "Affordable", "Luxurious", + "Basic", "Advanced", "Fundamental", "Essential", "Optional", "Mandatory", + "Required", "Forbidden", "Permitted", "Legal", "Illegal", "Lawful", "Unlawful", + "Ethical", "Unethical", "Moral", "Immoral", "Amoral", "Just", "Unjust", + "Fair", "Unfair", "Right", "Wrong", "Correct", "Incorrect", "Accurate", + "Inaccurate", "Precise", "Imprecise", "Vague", "Definite", "Ambiguous", + "Certain", "Uncertain", "Probable", "Improbable", "Possible", "Impossible", + "Real", "Unreal", "Imaginary", "Fictional", "Factual", "Symbolic", "Literal", + "Abstract", "Figurative", "Empty", "Full", "Hollow", "Solid", "Filled", + "Vacant", "Occupied", "Crowded", "Deserted", "Isolated", "Connected", + "Linked", "Separate", "United", "Divided", "Whole", "Partial", "Complete", + "Incomplete", "Finished", "Unfinished", "Perfect", "Imperfect", "Damaged", + "Intact", "Operational", "Defective", "Functional", "Dysfunctional", + "Healthy", "Sick", "Injured", "Wounded", "Healed", "Diseased", "Immune", + "Alive", "Animated", "Inanimate", "Conscious", "Unconscious", "Sentient", + "Sapient", "Intelligent", "Mindless", "Aware", "Oblivious", "Alert", + "Drowsy", "Sleeping", "Awake", "Dreaming", "Lucid", "Nightmarish", + "Hopeful", "Hopeless", "Optimistic", "Pessimistic", "Joyful", "Sorrowful", + "Cheerful", "Gloomy", "Excited", "Bored", "Interested", "Indifferent", + "Passionate", "Apathetic", "Loving", "Hateful", "Friendly", "Hostile", + "Welcoming", "Suspicious", "Trusting", "Distrustful", "Gullible", "Skeptical", + "Naive", "Cynical", "Innocent", "Guilty", "Blameless", "Responsible", + "Free", "Captive", "Enslaved", "Liberated", "Independent", "Dependent", + "Autonomous", "Subordinate", "Dominant", "Submissive", "Equal", "Unequal", + "Superior", "Inferior", "Primary", "Secondary", "Tertiary", "Major", "Minor", + "Significant", "Insignificant", "Crucial", "Trivial", "Urgent", "Routine", + "Special", "Ordinary", "Normal", "Abnormal", "Typical", "Atypical", + "Standard", "Custom", "Unique", "Generic", "Specific", "General", + "Universal", "Particular", "Consistent", "Inconsistent", "Reliable", + "Unreliable", "Predictable", "Unpredictable", "Stable", "Erratic", + "Constant", "Variable", "Fixed", "Adjustable", "Static", "Dynamic", + "Active", "Passive", "Inert", "Reactive", "Proactive", "Responsive", + "Sensitive", "Insensitive", "Delicate", "Robust", "Fragile", "Durable", + "Temporary", "Permanent", "Ephemeral", "Lasting", "Fleeting", "Enduring", + "Ancient", "Timeless", "Momentary", "Instantaneous", "Protracted", + "Forthcoming", "Past", "Present", "Future", "Initial", "Final", "Penultimate", + "Sequential", "Simultaneous", "Concurrent", "Asynchronous", "Synchronous", + "Parallel", "Serial", "Linear", "Nonlinear", "Cyclical", "Spiral", + "Random", "Ordered", "Structured", "Unstructured", "Organized", "Disorganized", + "Systematic", "Haphazard", "Methodical", "Intuitive", "Rational", "Irrational", + "Logical", "Illogical", "Coherent", "Incoherent", "Articulate", "Inarticulate", + "Eloquent", "Mumbling", "Fluent", "Stuttering", "Clear", "Obscure", + "Explicit", "Implicit", "Direct", "Indirect", "Subtle", "Obvious", + "Manifest", "Latent", "Overt", "Covert", "Public", "Confidential", + "Classified", "TopSecret", "Unclassified", "Encoded", "Decoded", "Encrypted", + "Plaintext", "Austere", "Lavish", "Minimalist", "Baroque", "Ornate", + "Utilitarian", "Decorative", "Functional", "Ceremonial", "Ritualistic", + "Sacrificial", "Consecrated", "Desecrated", "Blessed", "Cursed", "Enchanted", + "Magical", "Mundane", "Ordinary", "Extraordinary", "Supernatural", + "Paranormal", "Preternatural", "Otherworldly", "Uncanny", "Weird", "Bizarre", + "Grotesque", "Surreal", "Absurd", "Comical", "Tragic", "Dramatic", "Lyrical", + "Poetic", "Prosaic", "Musical", "Rhythmic", "Silent", "Still", "Moving", + "Flowing", "Stagnant", "Vibrant", "Dull", "Energetic", "Lethargic", + "Restless", "Peaceful", "Manic", "Depressed", "Anxious", "Relaxed", + "Tense", "Loose", "Tight", "Slack", "Strained", "Comfortable", "Uncomfortable", + "Painful", "Painless", "Pleasant", "Unpleasant", "Agreeable", "Disagreeable", + "Satisfying", "Unsatisfying", "Fulfilling", "Frustrating", "Rewarding", + "Punishing", "Addictive", "Repulsive", "Alluring", "Tempting", "Forbidden", + "Sanctioned", "Approved", "Rejected", "Accepted", "Denied", "Confirmed", + "Refuted", "Verified", "Unverified", "Proven", "Unproven", "Tested", + "Untested", "Experimental", "Theoretical", "Practical", "Applied", "Pure", + "Academic", "Vocational", "Professional", "Amateur", "Expert", "Novice", + "Skilled", "Unskilled", "Talented", "Gifted", "Mediocre", "Incompetent", + "Proficient", "Deficient", "Capable", "Incapable", "Able", "Unable", + "Ready", "Unready", "Willing", "Unwilling", "Forced", "Voluntary", + "Compulsory", "Elective", "Chosen", "Imposed", "Innate", "Acquired", + "Inherited", "Learned", "Instinctive", "Conditioned", "Habitual", "Sporadic", + "Frequent", "Infrequent", "Rare", "Ubiquitous", "Endemic", "Epidemic", + "Pandemic", "Contagious", "Infectious", "Sterile", "Fertile", "Barren", + "Productive", "Unproductive", "Fruitful", "Futile", "Effective", "Ineffective", + "Efficient", "Inefficient", "Optimal", "Suboptimal", "Adequate", "Inadequate", + "Sufficient", "Insufficient", "Abundant", "Scarce", "Plentiful", "Meager", + "Rich", "Poor", "Wealthy", "Impoverished", "Prosperous", "Destitute", + "Lucky", "Unlucky", "Fortunate", "Unfortunate", "Blessed", "Doomed", + "Fated", "Random", "Destined", "Accidental", "Intentional", "Unintentional", + "Deliberate", "Spontaneous", "Calculated", "Impulsive", "Planned", "Unplanned", + "Expected", "Unexpected", "Surprising", "Predictable", "Inevitable", "Avoidable", + "Escapable", "Inescapable", "Solvable", "Insolvable", "Answerable", "Unanswerable", + "Known", "Unknowable", "Finite", "Measurable", "Immeasurable", "Comparable", + "Incomparable", "Related", "Unrelated", "Relevant", "Irrelevant", "Appropriate", + "Inappropriate", "Suitable", "Unsuitable", "Fitting", "Unfitting", "Seemly", + "Unseemly", "Decent", "Indecent", "Modest", "Arrogant", "Proud", "Vain", + "Humble", "Meek", "Assertive", "Aggressive", "Passive", "Docile", "Rebellious", + "Compliant", "Defiant", "Obedient", "Disobedient", "Respectful", "Disrespectful", + "Courteous", "Rude", "Polite", "Impolite", "Considerate", "Inconsiderate", + "Thoughtful", "Thoughtless", "Tactful", "Tactless", "Diplomatic", "Blunt", + "Subtle", "Frank", "Candid", "Reserved", "Outgoing", "Introverted", "Extroverted", + "Ambiverted", "Sociable", "Antisocial", "Solitary", "Gregarious", "Aloof", + "Approachable", "Distant", "Warm", "Cold", "Friendly", "Unfriendly", "Charming", + "Repellent", "Engaging", "Boring", "Interesting", "Dull", "Fascinating", + "Tedious", "Stimulating", "Monotonous", "Varied", "Diverse", "Homogeneous", + "Uniform", "Eclectic", "Assorted", "Miscellaneous", "Purebred", "Hybrid", + "Mixed", "Segregated", "Integrated", "Unified", "Fragmented", "Cohesive", + "Disparate", "Congruent", "Incongruent", "Compatible", "Incompatible", + "Harmonious", "Clashing", "Aligned", "Misaligned", "Balanced", "Unbalanced", + "Symmetrical", "Asymmetrical", "Centered", "OffCenter", "Level", "Slanted", + "Vertical", "Horizontal", "Diagonal", "Perpendicular", "Parallel", "Intersecting", + "Tangent", "Concentric", "Eccentric", "Orthogonal", "Radial", "Axial", + "Spherical", "Cubical", "Conical", "Cylindrical", "Planar", "Volumetric", + "Holographic", "Fractal", "Recursive", "Iterative", "Generative", "Procedural", + "Algorithmic", "Heuristic", "Stochastic", "Deterministic", "Emergent", "Complex", + "Networked", "Distributed", "Centralized", "Decentralized", "PeerToPeer", + "Hierarchical", "Flat", "Layered", "Nested", "Interconnected", "Intertwined", + "Woven", "Knitted", "Braided", "Fused", "Welded", "Bolted", "Glued", + "Stitched", "Bound", "Loose", "Free", "Contained", "Released", "Captured", + "Escaped", "Wild", "Domesticated", "Feral", "Tame", "Savage", "Civilized", + "Primitive", "Advanced", "Rudimentary", "Sophisticated", "Crude", "Refined", + "Polished", "RoughHewn", "Raw", "Cooked", "Processed", "Natural", "Organic", + "Synthetic", "Artificial", "Genuine", "Counterfeit", "Imitation", "Original", + "Reproduction", "Authentic", "Spurious", "Legitimate", "Illegitimate", + "Valid", "Invalid", "Sound", "Fallacious", "True", "Misleading", "Erroneous" +])) + +OBJECTS = list(set([ + # Core + "Wizardry", "Maven", "Account", "Squad", "Tips", "Night", "Life", + "Dreams", "Setup", "Warrior", "Dad", "Moments", "Gram", "Fotos", + "Tales", "Key", "Gem", "Crown", "Sword", "Shield", "Orb", "Crystal", + "Book", "Star", "Planet", "Cloud", "Tree", "River", "Mountain", + "City", "Code", "Pixel", "Byte", "Note", "Rhythm", "Brush", "Canvas", + "Machine", "Network", "Engine", "Galaxy", "Universe", "Dimension", + "Realm", "Kingdom", "Empire", "Citadel", "Fortress", "Tower", "Dungeon", + "Cavern", "Labyrinth", "Portal", "Gate", "Rune", "Sigil", "Talisman", + "Amulet", "Relic", "Artifact", "Scroll", "Tome", "Codex", "Grimoire", + "Map", "Compass", "Sextant", "Telescope", "Microscope", "Elixir", "Potion", + "Flask", "Vial", "Herb", "Root", "Seed", "Spore", "Gemstone", "Scepter", + "Wand", "Staff", "Blade", "Dagger", "Arrow", "Bow", "Axe", "Hammer", + "Armor", "Helmet", "Gauntlet", "Boot", "Cloak", "Ring", "Throne", "Altar", + "Forge", "Anvil", "Loom", "Quill", "Ink", "Parchment", "Pigment", "Clay", + "Stone", "Wood", "Metal", "Glass", "Circuit", "Wire", "Chip", "Core", + "Matrix", "Grid", "Node", "Server", "Database", "Algorithm", "Script", + "Glitch", "Bug", "Patch", "Mod", "Console", "Controller", "Keyboard", + "Mouse", "Headset", "Monitor", "Stream", "Channel", "Feed", "Echo", + "Signal", "Wave", "Particle", "Atom", "Molecule", "Sun", "Moon", "Comet", + "Asteroid", "Nebula", "Void", "Abyss", "Nexus", "Heart", "Soul", "Mind", + "Spirit", "Nightmare", "Memory", "Thought", "Idea", "Concept", "Theory", + "Law", "Rule", "Quest", "Journey", "Saga", "Legend", "Myth", "Fable", + "Story", "Song", "Melody", "Harmony", "Beat", "Pulse", "Silence", + "Shadow", "Light", "Dark", "Dawn", "Dusk", "Twilight", "Midnight", + "Noon", "Sky", "Rain", "Snow", "Wind", "Storm", "Fire", "Flame", + "Ember", "Ash", "Water", "Ocean", "Sea", "Lake", "Pond", "Tide", + "Earth", "Soil", "Sand", "Dust", "Rock", "Valley", "Forest", "Grove", + "Leaf", "Branch", "Flower", "Thorn", "Vine", "Moss", "Fungus", "Beast", + "Creature", "Monster", "Dragon", "Phoenix", "Griffin", "Unicorn", "Wolf", + "Bear", "Eagle", "Raven", "Serpent", "Spider", "Scarab", "Data", "Info", + "Knowledge", "Wisdom", "Power", "Force", "Energy", "Magic", "Source", + "Lock", "Chain", "Puzzle", "Riddle", "Secret", "Clue", "Truth", "Lie", + "Hope", "Fear", "Joy", "Sorrow", "Anger", "Peace", "Chaos", "Order", + "Death", "Fate", "Destiny", "Time", "Space", "Reality", "Illusion", "Specter", + # Expansion + "Castle", "Keep", "Manor", "Villa", "Palace", "Temple", "Shrine", "Monastery", + "Abbey", "Cathedral", "Church", "Chapel", "Mosque", "Synagogue", "Pagoda", + "Pyramid", "Ziggurat", "Mausoleum", "Tomb", "Crypt", "Catacomb", "Ossuary", + "Hut", "Cabin", "Cottage", "House", "Home", "Apartment", "Condo", "Studio", + "Loft", "Penthouse", "Mansion", "Estate", "Chateau", "Bungalow", "Townhouse", + "Shack", "Tent", "Yurt", "Igloo", "Treehouse", "Cave", "Burrow", "Nest", + "Hive", "Lair", "Den", "Sanctuary", "Refuge", "Haven", "Oasis", "Island", + "Peninsula", "Continent", "Archipelago", "Volcano", "Geyser", "HotSpring", + "Glacier", "Iceberg", "Fjord", "Canyon", "Gorge", "Ravine", "Plateau", + "Mesa", "Butte", "Hill", "Peak", "Summit", "Ridge", "Cliff", "Crag", + "Beach", "Shore", "Coast", "Delta", "Estuary", "Bay", "Gulf", "Strait", + "Channel", "Sound", "Lagoon", "Marsh", "Swamp", "Bog", "Fen", "Wetland", + "Tundra", "Taiga", "Savanna", "Prairie", "Steppe", "Desert", "Wasteland", + "Jungle", "Rainforest", "Woodland", "Thicket", "Copse", "Meadow", "Field", + "Pasture", "Garden", "Orchard", "Vineyard", "Farm", "Ranch", "Plantation", + "Road", "Path", "Trail", "Track", "Street", "Avenue", "Boulevard", "Highway", + "Freeway", "Bridge", "Tunnel", "Overpass", "Underpass", "Canal", "Aqueduct", + "Dam", "Reservoir", "Well", "Cistern", "Fountain", "Pipeline", "Sewer", + "Mine", "Quarry", "OilRig", "WindTurbine", "SolarPanel", "PowerPlant", + "Factory", "Workshop", "Mill", "Refinery", "Warehouse", "Silo", "Granary", + "Depot", "Hangar", "Dock", "Pier", "Wharf", "Harbor", "Port", "Airport", + "Station", "Terminal", "Platform", "Stop", "Market", "Bazaar", "Mall", + "Shop", "Store", "Boutique", "Emporium", "Gallery", "Museum", "Library", + "Archive", "School", "University", "College", "Academy", "Institute", + "Laboratory", "Observatory", "Studio", "Theater", "Cinema", "Amphitheater", + "Arena", "Stadium", "Colosseum", "Gymnasium", "Spa", "Bathhouse", "Hospital", + "Clinic", "Infirmary", "Asylum", "Sanitarium", "Orphanage", "Prison", "Jail", + "Barracks", "Garrison", "Armory", "Arsenal", "Bunker", "Trench", "Wall", + "Fence", "Barricade", "Moat", "Rampart", "Parapet", "Battlement", "Watchtower", + "Lighthouse", "BellTower", "ClockTower", "Spire", "Steeple", "Dome", "Arch", + "Column", "Pillar", "Statue", "Monument", "Obelisk", "Fresco", "Mural", + "Tapestry", "Mosaic", "StainedGlass", "Sculpture", "Painting", "Drawing", + "Sketch", "Etching", "Engraving", "Photograph", "Hologram", "Blueprint", + "Diagram", "Schematic", "Manuscript", "Document", "Letter", "Journal", + "Diary", "Ledger", "Logbook", "Manifest", "Treaty", "Contract", "Deed", + "Will", "Testament", "Proclamation", "Decree", "Edict", "Charter", "Constitution", + "Scripture", "Gospel", "Sutra", "Veda", "Koran", "Torah", "Bible", "Hymn", + "Prayer", "Chant", "Mantra", "Incantation", "Spell", "Curse", "Blessing", + "Prophecy", "Omen", "Sign", "Token", "Symbol", "Emblem", "Crest", "Banner", + "Flag", "Standard", "Pennant", "Badge", "Insignia", "Medal", "Ribbon", + "Coin", "Currency", "Note", "Bill", "Token", "Chip", "Bar", "Ingot", "Nugget", + "Dust", "Powder", "Crystal", "Shard", "Fragment", "Piece", "Slice", "Lump", + "Block", "Slab", "Sheet", "Plate", "Rod", "Bar", "Wire", "Cable", "Fiber", + "Thread", "String", "Rope", "Cord", "Twine", "Yarn", "Fabric", "Cloth", + "Textile", "Leather", "Hide", "Pelt", "Fur", "Wool", "Cotton", "Silk", + "Linen", "Hemp", "Canvas", "Paper", "Cardboard", "Plastic", "Rubber", + "Ceramic", "Porcelain", "Earthenware", "Brick", "Tile", "Concrete", "Asphalt", + "Tar", "Resin", "Amber", "Jet", "Ivory", "Bone", "Horn", "Antler", "Shell", + "Pearl", "Coral", "Scale", "Feather", "Tooth", "Claw", "Talon", "Fang", + "Venom", "Antidote", "Toxin", "Acid", "Base", "Solvent", "Catalyst", "Reagent", + "Compound", "Mixture", "Solution", "Suspension", "Emulsion", "Gel", "Foam", + "Aerosol", "Smoke", "Vapor", "Gas", "Liquid", "Solid", "Plasma", "Slime", + "Ooze", "Goo", "Mud", "Silt", "Clay", "Loam", "Gravel", "Pebble", "Boulder", + "Meteorite", "Tektite", "Geode", "Fossil", "PetrifiedWood", "Coal", "Graphite", + "Diamond", "Quartz", "Feldspar", "Mica", "Granite", "Basalt", "Marble", + "Slate", "Sandstone", "Limestone", "Chalk", "Flint", "Obsidian", "Pumice", + "Sulfur", "Salt", "Potash", "Nitrate", "Alum", "Borax", "Gypsum", "Talc", + "Asbestos", "IronOre", "CopperOre", "GoldOre", "SilverOre", "TinOre", + "LeadOre", "ZincOre", "NickelOre", "AluminumOre", "UraniumOre", "TitaniumOre", + "Platinum", "Palladium", "Rhodium", "Osmium", "Iridium", "Mercury", + "Arsenic", "Antimony", "Bismuth", "Cadmium", "Chromium", "Cobalt", + "Manganese", "Molybdenum", "Tungsten", "Vanadium", "Zirconium", "Gallium", + "Germanium", "Indium", "Selenium", "Tellurium", "Polonium", "Astatine", + "Radon", "Francium", "Radium", "Actinium", "Thorium", "Protactinium", + "Neptunium", "Plutonium", "Americium", "Curium", "Berkelium", "Californium", + "Einsteinium", "Fermium", "Mendelevium", "Nobelium", "Lawrencium", + "Rutherfordium", "Dubnium", "Seaborgium", "Bohrium", "Hassium", "Meitnerium", + "Darmstadtium", "Roentgenium", "Copernicium", "Nihonium", "Flerovium", + "Moscovium", "Livermorium", "Tennessine", "Oganesson", "Element", + "Isotope", "Ion", "Cation", "Anion", "Proton", "Neutron", "Electron", + "Photon", "Quark", "Lepton", "Boson", "Fermion", "Gluon", "Graviton", + "Neutrino", "Antimatter", "DarkMatter", "DarkEnergy", "Singularity", + "BlackHole", "WhiteHole", "Wormhole", "Quasar", "Pulsar", "Magnetar", + "Supernova", "Hypernova", "RedGiant", "WhiteDwarf", "BrownDwarf", "NeutronStar", + "Protostar", "MainSequence", "Constellation", "Asterism", "Cluster", "Group", + "Supercluster", "Filament", "Wall", "Void", "CosmicMicrowaveBackground", + "BigBang", "Inflation", "Multiverse", "Hyperspace", "Subspace", "Slipstream", + "WarpDrive", "JumpDrive", "Teleporter", "Stargate", "Transporter", "Replicator", + "Holodeck", "Phaser", "Blaster", "Lightsaber", "ForceField", "DeflectorShield", + "TractorBeam", "CloakingDevice", "SensorArray", "Communicator", "Tricorder", + "UniversalTranslator", "Cyberdeck", "NeuralInterface", "Exoskeleton", "CyborgImplant", + "BionicArm", "ArtificialEye", "SyntheticOrgan", "GeneMod", "Vat", "Clone", + "Android", "Robot", "Drone", "Automaton", "Golem", "Homunculus", "Gargoyle", + "Chimera", "Manticore", "Hydra", "Cerberus", "Cyclops", "Giant", "Titan", + "Ogre", "Troll", "Goblin", "Orc", "Kobold", "Gremlin", "Imp", "Demon", "Devil", + "Angel", "Archangel", "Seraph", "Cherub", "Valkyrie", "Nymph", "Dryad", "Sprite", + "Pixie", "Fairy", "Leprechaun", "Gnome", "Dwarf", "Elf", "Hobbit", "Halfling", + "Centaur", "Satyr", "Faun", "Minotaur", "Harpy", "Siren", "Mermaid", "Merman", + "Naga", "Lamia", "Gorgon", "Medusa", "Sphinx", "Basilisk", "Cockatrice", + "Wyvern", "Roc", "Kraken", "Leviathan", "Behemoth", "Juggernaut", "Werewolf", + "Vampire", "Lich", "Ghoul", "Zombie", "Mummy", "Skeleton", "Ghost", "Phantom", + "Specter", "Wraith", "Poltergeist", "Banshee", "Shade", "Doppelganger", + "Shapeshifter", "Illusion", "Mirage", "Phantasm", "Hallucination", "Apparition", + "Entity", "Being", "Essence", "Presence", "Aura", "Emanation", "Vibration", + "Frequency", "Wavelength", "Spectrum", "Color", "Hue", "Tint", "Shade", + "Tone", "Sound", "Noise", "Pitch", "Volume", "Timbre", "Resonance", "Silence", + "Scent", "Odor", "Aroma", "Fragrance", "Stench", "Taste", "Flavor", "Aftertaste", + "Texture", "Feel", "Grain", "Temperature", "Pressure", "Weight", "Mass", + "Density", "Volume", "Area", "Length", "Width", "Height", "Depth", "Distance", + "Proximity", "Angle", "Curve", "Line", "Point", "Shape", "Form", "Structure", + "Pattern", "Design", "Composition", "Layout", "Arrangement", "Configuration", + "System", "Mechanism", "Device", "Apparatus", "Instrument", "Tool", "Utensil", + "Gadget", "Contraption", "Widget", "Gizmo", "Thingamajig", "Doodad", "Item", + "Object", "Article", "Commodity", "Product", "Goods", "Wares", "Merchandise", + "Supplies", "Provisions", "Equipment", "Gear", "Tackle", "Kit", "Outfit", + "Apparel", "Clothing", "Garment", "Attire", "Vestment", "Raiment", "Costume", + "Uniform", "Jewelry", "Accessory", "Adornment", "Trinket", "Bauble", "Knickknack", + "Souvenir", "Memento", "Heirloom", "Treasure", "Prize", "Reward", "Bounty", + "Loot", "Spoils", "Plunder", "Trophy", "Gift", "Present", "Offering", "Tribute", + "Donation", "Alms", "Charity", "Sacrifice", "Libation", "Incense", "Candle", + "Torch", "Lantern", "Lamp", "Lightbulb", "Laser", "Beam", "Ray", "Glimmer", + "Spark", "Flash", "Glow", "Shimmer", "Glitter", "Reflection", "Refraction", + "Diffraction", "Interference", "Polarization", "Lense", "Mirror", "Prism", + "Filter", "Screen", "Monitor", "Display", "Projector", "Camera", "Binoculars", + "MagnifyingGlass", "Eyeglasses", "ContactLense", "Microphone", "Speaker", + "Headphones", "Earbuds", "Amplifier", "Receiver", "Transmitter", "Antenna", + "SatelliteDish", "Modem", "Router", "Switch", "Hub", "Firewall", "Proxy", + "VPN", "Cable", "Connector", "Port", "Jack", "Plug", "Socket", "Adapter", + "Battery", "PowerSupply", "Generator", "Capacitor", "Resistor", "Transistor", + "Diode", "Inductor", "IntegratedCircuit", "Microprocessor", "MemoryChip", + "HardDrive", "SSD", "FlashDrive", "OpticalDisc", "FloppyDisk", "TapeDrive", + "Motherboard", "CPU", "GPU", "RAM", "ROM", "BIOS", "OperatingSystem", "Software", + "Application", "Program", "App", "Utility", "Driver", "Firmware", "Malware", + "Virus", "Worm", "Trojan", "Ransomware", "Spyware", "Adware", "Keylogger", + "Rootkit", "Botnet", "Firewall", "Antivirus", "Sandbox", "Honeypot", + "EncryptionKey", "Password", "Passphrase", "Biometric", "Fingerprint", + "RetinaScan", "Voiceprint", "FaceRecognition", "Token", "Certificate", + "DigitalSignature", "Blockchain", "Cryptocurrency", "Bitcoin", "Ethereum", + "NFT", "SmartContract", "Ledger", "Transaction", "Block", "Hash", "Wallet", + "Exchange", "MiningRig", "Node", "Protocol", "Algorithm", "Heuristic", + "Function", "Variable", "Constant", "Parameter", "Argument", "Loop", + "Condition", "Statement", "Expression", "Syntax", "Semantics", "Compiler", + "Interpreter", "Debugger", "IDE", "TextEditor", "VersionControl", "Repository", + "Branch", "Merge", "Commit", "Push", "Pull", "Clone", "Fork", "API", "SDK", + "Library", "Framework", "Module", "Package", "Dependency", "Class", "Object", + "Method", "Attribute", "Inheritance", "Polymorphism", "Encapsulation", + "Abstraction", "Interface", "DesignPattern", "Architecture", "Model", "View", + "Controller", "DatabaseSchema", "Table", "Row", "Column", "Index", "Query", + "SQL", "NoSQL", "JSON", "XML", "CSV", "YAML", "HTML", "CSS", "JavaScript", + "Python", "Java", "C++", "CSharp", "Ruby", "PHP", "Swift", "Kotlin", "Go", + "Rust", "TypeScript", "Assembly", "MachineCode", "Binary", "Hexadecimal", + "Decimal", "Octal", "Character", "String", "Integer", "Float", "Boolean", + "Array", "List", "Tuple", "Set", "Dictionary", "Map", "Graph", "Tree", + "Stack", "Queue", "LinkedList", "Heap", "Bit", "Flag", "Mask", "Pointer", + "Reference", "Handle", "Address", "Buffer", "Cache", "Stream", "File", + "Directory", "Path", "URL", "URI", "DomainName", "IP_Address", "MAC_Address", + "PortNumber", "Socket", "Packet", "Frame", "Datagram", "Segment", "ProtocolStack", + "OSI_Model", "TCP_IP", "HTTP", "HTTPS", "FTP", "SSH", "SMTP", "POP3", "IMAP", + "DNS", "DHCP", "UDP", "ICMP", "ARP", "Ethernet", "WiFi", "Bluetooth", "NFC", + "Cellular", "Satellite", "FiberOptic", "CopperWire", "RadioWave", "Microwave", + "Infrared", "Ultraviolet", "XRay", "GammaRay", "VisibleLight", "SoundWave", + "Ultrasound", "Infrasound", "SeismicWave", "GravityWave", "Shockwave", + "BlastWave", "TidalWave", "Tsunami", "Ripple", "Current", "Eddy", "Vortex", + "Whirlpool", "Waterspout", "Tornado", "Hurricane", "Typhoon", "Cyclone", + "Blizzard", "Thunderstorm", "Lightning", "Thunder", "Hail", "Sleet", "Fog", + "Smog", "Haze", "Mist", "Dew", "Frost", "Ice", "Snowflake", "Avalanche", + "Landslide", "Mudslide", "Earthquake", "Aftershock", "Tremor", "Eruption", + "Lava", "Magma", "AshCloud", "PyroclasticFlow", "Caldera", "Crater", + "Fissure", "Vent", "FaultLine", "TectonicPlate", "Mantle", "OuterCore", + "InnerCore", "Crust", "Atmosphere", "Troposphere", "Stratosphere", "Mesosphere", + "Thermosphere", "Exosphere", "Ionosphere", "Magnetosphere", "OzoneLayer", + "VanAllenBelt", "Aurora", "Meteor", "Meteoroid", "ShootingStar", "Fireball", + "Bolide", "AsteroidBelt", "KuiperBelt", "OortCloud", "InterstellarMedium", + "IntergalacticSpace", "LocalGroup", "VirgoSupercluster", "Laniakea", + "ObservableUniverse", "CosmicWeb", "EventHorizon", "Spacetime", "Continuum", + "FabricOfReality", "AlternateDimension", "PocketUniverse", "AstralPlane", + "EtherealPlane", "Feywild", "Shadowfell", "ElementalPlane", "Heavens", + "Hells", "Limbo", "Purgatory", "Valhalla", "Elysium", "Underworld", "Afterlife", + "Reincarnation", "Nirvana", "Enlightenment", "Ascension", "Transcendence", + "Deity", "God", "Goddess", "Pantheon", "Mythology", "Cosmology", "Theology", + "Philosophy", "Ideology", "Doctrine", "Dogma", "Creed", "Belief", "Faith", + "Doubt", "Heresy", "Blasphemy", "Apostasy", "Schism", "Cult", "Sect", + "Religion", "Spirituality", "Atheism", "Agnosticism", "Humanism", "Secularism", + "Nihilism", "Existentialism", "Stoicism", "Epicureanism", "Cynicism", + "Hedonism", "Utilitarianism", "Rationalism", "Empiricism", "Idealism", + "Materialism", "Dualism", "Monism", "Determinism", "FreeWill", "Predestination", + "Karma", "Dharma", "Samsara", "Moksha", "Tao", "Chi", "Yin", "Yang", "Zen", + "Koan", "Satori", "Yoga", "Meditation", "Mindfulness", "Prayer", "Ritual", + "Ceremony", "Sacrament", "Initiation", "Pilgrimage", "Fasting", "Feast", + "Festival", "Holiday", "Sabbath", "Jubilee", "Tradition", "Custom", "Etiquette", + "Manners", "Protocol", "CodeOfConduct", "HonorCode", "Oath", "Vow", "Pledge", + "Promise", "Contract", "Agreement", "Treaty", "Alliance", "Pact", "Covenant", + "Law", "Statute", "Ordinance", "Regulation", "Rule", "Precedent", "Jurisprudence", + "Justice", "Equity", "Fairness", "Rights", "Freedoms", "Liberties", "Duties", + "Responsibilities", "Obligations", "Privileges", "Immunities", "Crime", + "Felony", "Misdemeanor", "Infraction", "Violation", "Offense", "Transgression", + "Sin", "Vice", "Virtue", "Merit", "Demerit", "Punishment", "Penalty", + "Fine", "Sentence", "Imprisonment", "Execution", "Exile", "Banishment", + "Ostracism", "Shunning", "Reputation", "Honor", "Shame", "Glory", "Infamy", + "Fame", "Notoriety", "Legacy", "Heritage", "Lineage", "Ancestry", "Descendants", + "Family", "Clan", "Tribe", "Nation", "Race", "Ethnicity", "Culture", "Society", + "Civilization", "Community", "Neighborhood", "Village", "Town", "Metropolis", + "Megalopolis", "State", "Province", "Territory", "Country", "Federation", + "Confederation", "Union", "Alliance", "Coalition", "Organization", "Institution", + "Corporation", "Company", "Business", "Enterprise", "Startup", "NonProfit", + "Foundation", "Association", "Guild", "Union", "Club", "Society", "Fraternity", + "Sorority", "Team", "Crew", "Gang", "Mob", "Syndicate", "Cartel", "Cult", + "Faction", "Party", "Movement", "Government", "Monarchy", "Republic", + "Democracy", "Theocracy", "Autocracy", "Oligarchy", "Anarchy", "Dictatorship", + "Totalitarianism", "Feudalism", "Capitalism", "Socialism", "Communism", + "Fascism", "Nationalism", "Imperialism", "Colonialism", "Globalism", + "Federalism", "Separatism", "Populism", "Liberalism", "Conservatism", + "Progressivism", "Libertarianism", "Environmentalism", "Feminism", "Pacifism", + "Militarism", "Revolution", "Rebellion", "Uprising", "Coup", "Insurrection", + "CivilWar", "War", "Battle", "Skirmish", "Siege", "Campaign", "Conflict", + "Truce", "Ceasefire", "Armistice", "PeaceTreaty", "Diplomacy", "Negotiation", + "Embargo", "Sanctions", "Espionage", "Intelligence", "Propaganda", "Sabotage", + "Terrorism", "CounterTerrorism", "Resistance", "Underground", "Dissident", + "Refugee", "AsylumSeeker", "DisplacedPerson", "Casualty", "Veteran", + "Memorial", "Monument", "History", "Prehistory", "Antiquity", "MiddleAges", + "Renaissance", "Enlightenment", "IndustrialRevolution", "InformationAge", + "Future", "Utopia", "Dystopia", "Apocalypse", "PostApocalypse", "Armageddon", + "Ragnarok", "JudgmentDay", "EndTimes", "NewBeginning", "GoldenAge", + "DarkAge", "Epoch", "Era", "Period", "Millennium", "Century", "Decade", + "Year", "Season", "Month", "Week", "Day", "Hour", "Minute", "Second", + "Moment", "Instant", "Eternity", "Infinity", "Continuum", "Cycle", "Rhythm", + "Tempo", "Cadence", "Frequency", "Interval", "Duration", "Timeline", + "Schedule", "Calendar", "Almanac", "Chronicle", "Annals", "Record", "Log", + "Journal", "Diary", "Memoir", "Biography", "Autobiography", "Novel", + "ShortStory", "Novella", "Epic", "Poem", "Ballad", "Sonnet", "Haiku", + "Limerick", "Verse", "Prose", "Play", "Script", "Screenplay", "Libretto", + "Lyrics", "Score", "SheetMusic", "Symphony", "Concerto", "Sonata", "Opera", + "Ballet", "Musical", "Oratorio", "Cantata", "Fugue", "Overture", "Suite", + "Aria", "Chorus", "Recitative", "Etude", "Nocturne", "Prelude", "Rhapsody", + "Waltz", "March", "Anthem", "Hymn", "Carol", "Chant", "Madrigal", "Motet", + "FolkSong", "Blues", "Jazz", "Rock", "Pop", "HipHop", "Electronic", "Classical", + "WorldMusic", "Ambient", "Soundtrack", "Jingle", "ThemeSong", "Lullaby", + "NurseryRhyme", "Riddle", "Proverb", "Maxim", "Aphorism", "Epigram", "Quote", + "Slogan", "Motto", "Catchphrase", "Buzzword", "Jargon", "Slang", "Dialect", + "Accent", "Language", "Alphabet", "Character", "Glyph", "Ideogram", "Logogram", + "Syllabary", "Phoneme", "Morpheme", "Word", "Phrase", "Clause", "Sentence", + "Paragraph", "Chapter", "Volume", "Text", "Speech", "Lecture", "Sermon", + "Debate", "Discussion", "Conversation", "Dialogue", "Monologue", "Soliloquy", + "Narration", "Description", "Exposition", "Argument", "Rhetoric", "Logic", + "Reason", "Emotion", "Passion", "Instinct", "Intuition", "Conscience", + "Morality", "Ethics", "Aesthetics", "Beauty", "Sublime", "Art", "Craft", + "Skill", "Technique", "Talent", "Genius", "Creativity", "Imagination", + "Inspiration", "Muse", "Medium", "Style", "Genre", "Movement", "School", + "Masterpiece", "WorkOfArt", "Oeuvre", "Canon", "Critique", "Review", + "Analysis", "Interpretation", "Theory", "Hypothesis", "Experiment", + "Observation", "Measurement", "Data", "Evidence", "Proof", "Conclusion", + "Discovery", "Invention", "Innovation", "Technology", "Science", "Mathematics", + "Physics", "Chemistry", "Biology", "Astronomy", "Geology", "Ecology", + "Medicine", "Engineering", "ComputerScience", "Psychology", "Sociology", + "Anthropology", "Economics", "PoliticalScience", "History", "Linguistics", + "Philosophy", "Literature", "Musicology", "ArtHistory", "Theology", + "Education", "Pedagogy", "Curriculum", "Lesson", "Lecture", "Seminar", + "Workshop", "Tutorial", "Exam", "Test", "Quiz", "Assignment", "Homework", + "Project", "Thesis", "Dissertation", "Diploma", "Degree", "Certificate", + "License", "Qualification", "Credential", "Skillset", "Expertise", "Competence", + "Proficiency", "Mastery", "KnowledgeBase", "Wisdom", "Understanding", + "Insight", "Awareness", "Perception", "Cognition", "Memory", "Recall", + "Recognition", "Learning", "Attention", "Concentration", "Focus", "Distraction", + "ThoughtProcess", "ProblemSolving", "DecisionMaking", "Judgment", "Bias", + "Heuristic", "Fallacy", "LogicError", "CognitiveDissonance", "Mindset", + "Attitude", "Perspective", "Worldview", "Paradigm", "FrameOfReference", + "BeliefSystem", "ValueSystem", "Motivation", "Drive", "Ambition", "Goal", + "Objective", "Purpose", "Meaning", "Intention", "Willpower", "Discipline", + "Habit", "Routine", "Emotion", "Feeling", "Mood", "Temperament", "Personality", + "Character", "Trait", "Disposition", "Behavior", "Action", "Reaction", + "Response", "Interaction", "Relationship", "Bond", "Connection", "Attachment", + "Affection", "Love", "Lust", "Infatuation", "Friendship", "Companionship", + "Rivalry", "Enmity", "Hatred", "Antipathy", "Indifference", "Empathy", + "Sympathy", "Compassion", "Kindness", "Cruelty", "Generosity", "Greed", + "Envy", "Jealousy", "Pride", "Humility", "Anger", "Rage", "Irritation", + "Annoyance", "Frustration", "Disappointment", "Sadness", "Grief", "Sorrow", + "Melancholy", "Despair", "Hope", "Optimism", "Pessimism", "Joy", "Happiness", + "Elation", "Ecstasy", "Bliss", "Contentment", "Satisfaction", "Gratitude", + "Regret", "Remorse", "Guilt", "Shame", "Embarrassment", "Anxiety", "Worry", + "Fear", "Terror", "Panic", "Phobia", "Stress", "Tension", "Relaxation", + "Calm", "Serenity", "Peace", "Tranquility", "Excitement", "Thrill", + "Anticipation", "Suspense", "Surprise", "Amazement", "Awe", "Wonder", + "Curiosity", "Boredom", "Apathy", "Lethargy", "Fatigue", "Energy", + "Vitality", "Vigor", "Stamina", "Endurance", "Strength", "Power", "Weakness", + "Fragility", "Resilience", "Toughness", "Hardiness", "Agility", "Dexterity", + "Coordination", "Balance", "Flexibility", "Speed", "Quickness", "Reflexes", + "Accuracy", "Precision", "Steadiness", "Health", "Wellness", "Sickness", + "Illness", "Disease", "Malady", "Ailment", "Condition", "Disorder", + "Syndrome", "Injury", "Wound", "Trauma", "Pain", "Ache", "Soreness", + "Comfort", "Discomfort", "Pleasure", "Displeasure", "Sensation", "Perception", + "Sight", "Vision", "Hearing", "Audition", "Smell", "Olfaction", "Taste", + "Gustation", "Touch", "Tactition", "Proprioception", "Nociception", + "Thermoception", "Equilibrioception", "Chronoception", "Interoception", + "Sense", "Instinct", "GutFeeling", "Hunch", "Premonition", "Clairvoyance", + "Telepathy", "Telekinesis", "Precognition", "Retrocognition", "Psychometry", + "AstralProjection", "Mediumship", "Channeling", "Divination", "Scrying", + "Augury", "Tarot", "Runes", "Astrology", "Numerology", "Palmistry", + "Geomancy", "Chiromancy", "Cartomancy", "Oneiromancy", "Necromancy", + "Alchemy", "Thaumaturgy", "Sorcery", "Witchcraft", "Wizardry", "Enchantment", + "Conjuration", "Summoning", "Invocation", "Evocation", "Abjuration", + "Transmutation", "Illusion", "Divination", "Restoration", "Destruction", + "Alteration", "Mysticism", "Occultism", "Esotericism", "Gnosticism", + "Hermeticism", "Kabbalah", "Theosophy", "Wicca", "Paganism", "Shamanism", + "Animism", "Polytheism", "Monotheism", "Pantheism", "Panentheism", "Deism", + "Agnosticism", "Atheism", "Humanism", "Secularism" +])) + +ACTIONS_VERBS = list(set([ + # Core + "Coding", "Gaming", "Writing", "Reading", "Drawing", "Singing", + "Dancing", "Running", "Jumping", "Building", "Exploring", "Crafting", + "Dreaming", "Living", "Growing", "Creating", "Sailing", "Flying", + "Fighting", "Casting", "Healing", "Stealing", "Forging", "Analyzing", + "Synthesizing", "Navigating", "Awakening", "Converging", "Hacking", + "Streaming", "Designing", "Composing", "Painting", "Sculpting", "Brewing", + "Enchanting", "Conjuring", "Summoning", "Banishing", "Protecting", + "Defending", "Attacking", "Striking", "Dodging", "Sneaking", "Tracking", + "Hunting", "Trapping", "Taming", "Riding", "Diving", "Swimming", + "Climbing", "Crawling", "Sprinting", "Leaping", "Falling", "Rising", + "Ascending", "Descending", "Teleporting", "Phasing", "Shifting", "Morphing", + "Transforming", "Shrinking", "Melting", "Freezing", "Exploding", + "Imploding", "Collapsing", "Expanding", "Radiating", "Absorbing", + "Reflecting", "Refracting", "Focusing", "Channeling", "Meditating", + "Remembering", "Forgetting", "Learning", "Teaching", "Knowing", "Believing", + "Doubting", "Questioning", "Answering", "Solving", "Destroying", "Breaking", + "Mending", "Restoring", "Corrupting", "Cleansing", "Blessing", "Cursing", + "Judging", "Forgiving", "Seeking", "Finding", "Losing", "Winning", + "Failing", "Surviving", "Thriving", "Vanishing", "Appearing", "Echoing", + "Resonating", "Vibrating", "Pulsing", "Shining", "Fading", "Observing", + "Listening", "Speaking", "Whispering", "Shouting", "Playing", "Working", + "Resting", "Waiting", "Watching", "Plotting", "Scheming", "Strategizing", + "Calculating", "Computing", "Processing", "Decrypting", "Encrypting", + "Uploading", "Downloading", "Connecting", "Disconnecting", "Evolving", + "Adapting", "Overcoming", "Mastering", "Yielding", "Submitting", "Governing", + # Expansion + "Thinking", "Pondering", "Contemplating", "Reflecting", "Considering", + "Imagining", "Visualizing", "Inventing", "Innovating", "Experimenting", + "Testing", "Measuring", "Calibrating", "Documenting", "Recording", "Logging", + "Charting", "Graphing", "Mapping", "Modeling", "Simulating", "Predicting", + "Forecasting", "Estimating", "Guessing", "Assuming", "Inferring", "Deducing", + "Inducing", "Reasoning", "Arguing", "Debating", "Discussing", "Negotiating", + "Bargaining", "Compromising", "Collaborating", "Cooperating", "Competing", + "Challenging", "Opposing", "Resisting", "Rebelling", "Fighting", "Battling", + "WagingWar", "Defending", "Guarding", "Shielding", "Warding", "Parrying", + "Blocking", "Intercepting", "Avoiding", "Evading", "Escaping", "Fleeing", + "Retreating", "Advancing", "Charging", "Pursuing", "Chasing", "Hunting", + "Stalking", "Ambushing", "Trapping", "Capturing", "Imprisoning", "Binding", + "Restraining", "Enslaving", "Liberating", "Freeing", "Rescuing", "Saving", + "Helping", "Assisting", "Supporting", "Aiding", "Comforting", "Consoling", + "Encouraging", "Motivating", "Inspiring", "Leading", "Guiding", "Directing", + "Commanding", "Ordering", "Instructing", "Training", "Coaching", "Mentoring", + "Advising", "Counseling", "Consulting", "Informing", "Notifying", "Warning", + "Alerting", "Reporting", "Communicating", "Signaling", "Gesturing", "Expressing", + "Showing", "Demonstrating", "Illustrating", "Explaining", "Describing", + "Narrating", "Reciting", "Performing", "Acting", "Mimicking", "Impersonating", + "Joking", "Teasing", "Flirting", "Seducing", "Charming", "Persuading", + "Convincing", "Manipulating", "Deceiving", "Lying", "Betraying", "Tricking", + "Swindling", "Cheating", "Stealing", "Robbing", "Pilfering", "Plundering", + "Looting", "Smuggling", "Poaching", "Trespassing", "Violating", "Breaking", + "Vandalizing", "Destroying", "Demolishing", "Annihilating", "Obliterating", + "Erasing", "Deleting", "Burning", "Scorching", "Melting", "Dissolving", + "Crushing", "Shattering", "Splintering", "Tearing", "Ripping", "Cutting", + "Slicing", "Chopping", "Carving", "Etching", "Engraving", "Sculpting", + "Molding", "Shaping", "Forming", "Assembling", "Constructing", "Erecting", + "Raising", "Lifting", "Hoisting", "Lowering", "Dropping", "Placing", "Setting", + "Arranging", "Organizing", "Sorting", "Classifying", "Categorizing", "Labeling", + "Indexing", "Filing", "Storing", "Stockpiling", "Hoarding", "Collecting", + "Gathering", "Harvesting", "Reaping", "Mining", "Excavating", "Drilling", + "Digging", "Tunneling", "Exploring", "Surveying", "Scouting", "Reconnoitering", + "Patrolling", "Searching", "Seeking", "Questing", "Journeying", "Traveling", + "Wandering", "Roaming", "Drifting", "Migrating", "Commuting", "Driving", + "Flying", "Floating", "Hovering", "Gliding", "Soaring", "Plummeting", + "Diving", "Surfing", "Skating", "Skiing", "Snowboarding", "Cycling", + "Hiking", "Trekking", "Backpacking", "Camping", "Fishing", "Boating", + "Kayaking", "Canoeing", "Rafting", "Rowing", "Paddling", "Sailing", + "Cruising", "Motoring", "Piloting", "Navigating", "Steering", "Maneuvering", + "Parking", "Docking", "Landing", "Launching", "TakingOff", "Warping", + "Jumping", "Blinking", "Phasing", "Shifting", "Teleporting", "Summoning", + "Conjuring", "Invoking", "Evoking", "Banishing", "Dismissing", "Dispelling", + "Nullifying", "Countering", "Abjuring", "Warding", "Shielding", "Protecting", + "Healing", "Curing", "Mending", "Restoring", "Regenerating", "Reviving", + "Resurrecting", "Enhancing", "Augmenting", "Boosting", "Empowering", + "Strengthening", "Weakening", "Debilitating", "Crippling", "Hindering", + "Slowing", "Hastening", "Accelerating", "Enchanting", "Imbuing", "Blessing", + "Cursing", "Hexing", "Jinxing", "Bewitching", "Charming", "Transmuting", + "Altering", "Changing", "Morphing", "Transforming", "Shapeshifting", + "Illusioning", "Disguising", "Camouflaging", "Cloaking", "Vanishing", + "Appearing", "Materializing", "Dematerializing", "Divining", "Scrying", + "Predicting", "Foreseeing", "Prophesying", "Communicating", "Telepathing", + "Controlling", "Dominating", "Influencing", "Commanding", "Compelling", + "Possessing", "Animating", "ConstructingGolems", "RaisingUndead", "Necromancing", + "Experimenting", "Researching", "Studying", "Learning", "Memorizing", + "Recalling", "Forgetting", "Understanding", "Comprehending", "Interpreting", + "Translating", "Deciphering", "Decoding", "Encoding", "Encrypting", + "Computing", "Calculating", "Programming", "Debugging", "Testing", "Optimizing", + "Refactoring", "Deploying", "Maintaining", "Updating", "Upgrading", + "Downgrading", "Installing", "Uninstalling", "Configuring", "Troubleshooting", + "Monitoring", "Logging", "Auditing", "Securing", "Hardening", "Patching", + "BackingUp", "Restoring", "Migrating", "Cloning", "Virtualizing", + "Containerizing", "Orchestrating", "Scaling", "LoadBalancing", "Networking", + "Routing", "Switching", "Bridging", "Firewalling", "Filtering", "Proxying", + "Authenticating", "Authorizing", "Accounting", "Browsing", "Searching", + "Googling", "Surfing", "Streaming", "Downloading", "Uploading", "Sharing", + "Posting", "Blogging", "Vlogging", "Tweeting", "Commenting", "Liking", + "Subscribing", "Following", "Friending", "Unfriending", "Blocking", "Reporting", + "Messaging", "Chatting", "Emailing", "Calling", "VideoConferencing", "Gaming", + "Playing", "Competing", "Cooperating", "Winning", "Losing", "Drawing", + "LevelingUp", "Grinding", "Farming", "Looting", "Crafting", "Trading", + "Questing", "Raiding", "Exploring", "Roleplaying", "Strategizing", "Tacticking", + "Practicing", "Training", "Exercising", "WorkingOut", "Stretching", "WarmingUp", + "CoolingDown", "Lifting", "Running", "Jogging", "Walking", "Swimming", + "Cycling", "Yogaing", "Pilatesing", "Meditating", "Relaxing", "Resting", + "Sleeping", "Napping", "Dreaming", "Waking", "Rising", "Eating", "Drinking", + "Feasting", "Dining", "Snacking", "Tasting", "Sipping", "Gulping", "Chewing", + "Swallowing", "Digesting", "Breathing", "Inhaling", "Exhaling", "Panting", + "Gasping", "Sighing", "Yawning", "Coughing", "Sneezing", "Hiccuping", + "Burping", "Farting", "Seeing", "Looking", "Watching", "Observing", "Staring", + "Gazing", "Glancing", "Peeking", "Squinting", "Blinking", "Winking", "Hearing", + "Listening", "Overhearing", "Eavesdropping", "Smelling", "Sniffing", "Inhaling", + "Tasting", "Savoring", "Licking", "Touching", "Feeling", "Probing", "Poking", + "Stroking", "Petting", "Patting", "Grabbing", "Grasping", "Clutching", + "Holding", "Carrying", "Lifting", "Pushing", "Pulling", "Dragging", "Throwing", + "Catching", "Tossing", "Hitting", "Punching", "Kicking", "Slapping", "Striking", + "Bashing", "Smashing", "Crushing", "Shooting", "Firing", "Launching", + "Bombing", "Exploding", "Detonating", "Speaking", "Talking", "Chatting", + "Whispering", "Muttering", "Murmuring", "Shouting", "Yelling", "Screaming", + "Singing", "Humming", "Whistling", "Chanting", "Reciting", "Laughing", + "Giggling", "Chuckling", "Crying", "Sobbing", "Weeping", "Wailing", "Groaning", + "Moaning", "Grunting", "Growling", "Snarling", "Hissing", "Roaring", "Barking", + "Meowing", "Chirping", "Croaking", "Buzzing", "Howling", "Screeching", + "Clapping", "Snapping", "Stomping", "Tapping", "Knocking", "Banging", + "Rattling", "Shaking", "Vibrating", "Pulsing", "Beating", "Thumping", + "Flowing", "Streaming", "Pouring", "Dripping", "Leaking", "Seeping", + "Gushing", "Spraying", "Splashing", "Bubbling", "Boiling", "Simmering", + "Freezing", "Thawing", "Melting", "Evaporating", "Condensing", "Sublimating", + "Depositing", "Growing", "Shrinking", "Expanding", "Contracting", "Swelling", + "Blooming", "Wilting", "Sprouting", "Ripening", "Rotting", "Decaying", + "Decomposing", "Festering", "Fermenting", "Aging", "Maturing", "Developing", + "Evolving", "Mutating", "Adapting", "Regenerating", "Reproducing", "Breeding", + "Spawning", "Hatching", "Birthing", "Nursing", "Nurturing", "Raising", + "Teaching", "Educating", "Indoctrinating", "Brainwashing", "Grooming", + "Socializing", "Integrating", "Assimilating", "Alienating", "Isolating", + "Segregating", "Uniting", "Dividing", "Joining", "Leaving", "Entering", + "Exiting", "Arriving", "Departing", "Staying", "Moving", "Relocating", + "Settling", "Establishing", "Founding", "Abolishing", "Ending", "Finishing", + "Completing", "Starting", "Beginning", "Initiating", "Continuing", "Persisting", + "Resuming", "Pausing", "Stopping", "Ceasing", "Halting", "Interrupting", + "Delaying", "Postponing", "Accelerating", "Slowing", "Maintaining", "Sustaining", + "Preserving", "Conserving", "Protecting", "Saving", "Wasting", "Squandering", + "Consuming", "Using", "Utilizing", "Employing", "Applying", "Implementing", + "Executing", "Performing", "Operating", "Running", "Managing", "Administering", + "Supervising", "Overseeing", "Controlling", "Governing", "Ruling", "Leading", + "Following", "Obeying", "Serving", "Assisting", "Working", "Toiling", "Laboring", + "Striving", "Endeavoring", "Attempting", "Trying", "Succeeding", "Achieving", + "Accomplishing", "Failing", "Struggling", "Suffering", "Enduring", "Tolerating", + "Accepting", "Rejecting", "Approving", "Disapproving", "Praising", "Criticizing", + "Blaming", "Accusing", "Condemning", "Forgiving", "Pardoning", "Excusing", + "Justifying", "Defending", "Advocating", "Supporting", "Opposing", "Protesting", + "Demonstrating", "Petitioning", "Lobbying", "Voting", "Campaigning", "Electing", + "Appointing", "Promoting", "Demoting", "Hiring", "Firing", "Retiring", + "Resigning", "Investing", "Trading", "Buying", "Selling", "Bartering", "Lending", + "Borrowing", "Donating", "Receiving", "Giving", "Taking", "Sharing", "Dividing", + "Combining", "Merging", "Separating", "Splitting", "Connecting", "Disconnecting", + "Linking", "Unlinking", "Attaching", "Detaching", "Binding", "Unbinding", + "Wrapping", "Unwrapping", "Covering", "Uncovering", "Hiding", "Revealing", + "Exposing", "Concealing", "Masking", "Disguising", "Identifying", "Recognizing", + "Labeling", "Marking", "Branding", "Noticing", "Perceiving", "Realizing", + "Acknowledging", "Ignoring", "Overlooking", "Forgetting", "Remembering", + "Recollecting", "Reminiscing", "Anticipating", "Expecting", "Hoping", "Fearing", + "Worrying", "Wishing", "Desiring", "Craving", "Yearning", "Loving", "Hating", + "Liking", "Disliking", "Admiring", "Despising", "Respecting", "Disrespecting", + "Trusting", "Distrusting", "Believing", "Doubting", "Questioning", "Wondering", + "Imagining", "Fantasizing", "Hallucinating", "Focusing", "Concentrating", + "PayingAttention", "Ignoring", "Meditating", "Praying", "Worshipping", + "Celebrating", "Mourning", "Grieving", "Ritualizing", "Ceremonializing", + "Consecrating", "Desecrating", "Purifying", "Tainting", "Sanctifying", + "Defiling", "Redeeming", "Damning", "Saving", "Condemning", "Absolving", + "Judging", "Sentencing", "Punishing", "Rewarding", "Enforcing", "Regulating", + "Legislating", "Governing", "Diplomating", "Negotiating", "Arbitrating", + "Mediating", "Reconciling", "Peacemaking", "Warring", "Conquering", + "Liberating", "Colonizing", "Settling", "Pioneering", "Innovating", + "Discovering", "Inventing", "Creating", "Artisting", "Musicking", "Writing", + "Storytelling", "Philosophizing", "Theorizing", "Hypothesizing", "Analyzing", + "Synthesizing", "Critiquing", "Reviewing", "Editing", "Publishing", "Broadcasting", + "Communicating", "Teaching", "Learning", "Studying", "Researching", "Archiving", + "Preserving", "Curating", "Exhibiting", "Performing", "Entertaining", + "Amusing", "Distracting", "Inspiring", "Motivating", "Challenging", + "Provoking", "Comforting", "Soothing", "Healing", "Nourishing", "Sustaining", + "Living", "Being", "Existing", "Becoming", "Transcending", "Ascending", + "Perishing", "Dying", "Ceasing", "Ending" +])) + +# Verify list sizes BEFORE combining +print(f"Unique Professions: {len(PROFESSIONS)}") +print(f"Unique Adjectives: {len(ADJECTIVES)}") +print(f"Unique Objects: {len(OBJECTS)}") +print(f"Unique Actions: {len(ACTIONS_VERBS)}") +print("-" * 20) + + +# Combine word lists for the first part of the username +ALL_WORD_OPTIONS = PROFESSIONS + ADJECTIVES + OBJECTS + ACTIONS_VERBS + +# Options for the second part (Object or Verb/Action) +SECOND_PART_OPTIONS = OBJECTS + ACTIONS_VERBS + +# --- Separators --- +SEPARATORS = ['_', '-', '.', '', ''] # Include empty string '' for no separator + +# --- Special Characters --- +SINGLE_SPECIAL_CHARS = ['_', '-', '*', '#', '!', '.', ':', ';', '~', '=', '+'] +SYMMETRICAL_PAIRS = [('{', '}'), ('[', ']'), ('(', ')'), ('<', '>'), ('/', '\\'), ('|', '|')] + +# --- Configuration for Variability --- +SPECIAL_CHAR_ADD_PROBABILITY = 0.8 +SYMMETRICAL_CHAR_PROBABILITY = 0.4 +MAX_SINGLE_CHARS_COUNT = 4 + +# --- Generation Function --- +def generate_username(): + """Generates a single username with random components and special characters.""" + try: + word1 = random.choice(ALL_WORD_OPTIONS) + separator = random.choice(SEPARATORS) + word2 = random.choice(SECOND_PART_OPTIONS) + except IndexError: + # Fallback if any list ended up empty (shouldn't happen with populated lists) + return "ErrorFallbackUser" + + username_core = word1 + separator + word2 + + start_chars = "" + end_chars = "" + + include_special_chars = random.random() < SPECIAL_CHAR_ADD_PROBABILITY + + if include_special_chars: + location = random.choice(['start', 'end', 'both']) + use_symmetrical_pair = (location == 'both') and (random.random() < SYMMETRICAL_CHAR_PROBABILITY) + + if use_symmetrical_pair and SYMMETRICAL_PAIRS: + open_char, close_char = random.choice(SYMMETRICAL_PAIRS) + start_chars = open_char + end_chars = close_char + else: + if location in ['start', 'both'] and SINGLE_SPECIAL_CHARS: + k = random.randint(1, MAX_SINGLE_CHARS_COUNT) + start_chars = ''.join(random.choices(SINGLE_SPECIAL_CHARS, k=k)) + if location in ['end', 'both'] and SINGLE_SPECIAL_CHARS: + k = random.randint(1, MAX_SINGLE_CHARS_COUNT) + end_chars = ''.join(random.choices(SINGLE_SPECIAL_CHARS, k=k)) + + final_username = start_chars + username_core + end_chars + final_username = final_username.strip() # Remove accidental whitespace + + # Basic check to avoid usernames that are *only* special characters + if not any(c.isalnum() for c in final_username) and final_username: + # If it contains no letters or numbers, generate a simpler fallback + try: + return random.choice(ALL_WORD_OPTIONS) + random.choice(SEPARATORS) + random.choice(SECOND_PART_OPTIONS) + except IndexError: + return "ErrorFallbackUser2" + + # Ensure username is not empty after stripping + if not final_username: + try: + return random.choice(ALL_WORD_OPTIONS) + random.choice(SECOND_PART_OPTIONS) # Force concatenation + except IndexError: + return "ErrorFallbackUser3" + + return final_username + +# --- Main Logic --- + +output_filename = "generated.py" +output_directory = "." # Use "." for current directory, or specify a path +full_output_path = os.path.join(output_directory, output_filename) + +# Check for the --make_all flag +make_all_combinations = "--make_all" in sys.argv + +USERNAMES_LIST = [] # Initialize an empty list or set depending on mode + +if make_all_combinations: + print("Generating ALL unique combinations...") + + # Use a set to automatically handle uniqueness + all_unique_usernames_set = set() + + # Calculate all core combinations (Word1 + Separator + Word2) + core_combinations = list(itertools.product(ALL_WORD_OPTIONS, SEPARATORS, SECOND_PART_OPTIONS)) + print(f"Calculating {len(core_combinations):,} core combinations...") + + # Calculate all possible single character sequences (length 1 to MAX_SINGLE_CHARS_COUNT) + all_single_sequences = [] + for k in range(1, MAX_SINGLE_CHARS_COUNT + 1): + all_single_sequences.extend([''.join(seq) for seq in itertools.product(SINGLE_SPECIAL_CHARS, repeat=k)]) + # Include the empty string for cases where chars are only at one end, or none + all_single_sequences_with_empty = [''] + all_single_sequences + + num_single_sequences = len(all_single_sequences) + num_single_sequences_with_empty = len(all_single_sequences_with_empty) + + # Generate and add variations + # This loop might be very long depending on list sizes and MAX_SINGLE_CHARS_COUNT + # Progress indicator is helpful here + total_cores = len(core_combinations) + for i, (word1, sep, word2) in enumerate(core_combinations): + if (i + 1) % 10000 == 0 or (i + 1) == total_cores: + print(f"Processing core combination {i + 1:,} of {total_cores:,}...", end='\r') + + core_username = word1 + sep + word2 + + # Variation 1: Core only + all_unique_usernames_set.add(core_username) + + # Variation 2: Symmetrical pairs wrapping core + for open_char, close_char in SYMMETRICAL_PAIRS: + all_unique_usernames_set.add(open_char + core_username + close_char) + + # Variations 3, 4, 5: Single characters at start/end/both + # This combines variations 3, 4, and 5 efficiently + for start_seq in all_single_sequences_with_empty: + for end_seq in all_single_sequences_with_empty: + # Avoid adding the core_username again (case where start_seq and end_seq are both empty) + if start_seq == '' and end_seq == '': + continue # Already added above + + # Avoid adding symmetrical pair wraps here if they overlap with single chars + # For simplicity with large lists, we assume symmetrical pairs are distinct + # from repeated single chars. This might slightly overcount if a pair matches, + # e.g. `__username__` vs `{username}` if `_` was also in SYMMETRICAL_PAIRS. + # Given the typical chars in SYMMETRICAL_PAIRS and SINGLE_SPECIAL_CHARS, + # overlap is minimal. The set handles duplicates anyway. + + all_unique_usernames_set.add(start_seq + core_username + end_seq) + + # Convert set to list for writing + USERNAMES_LIST = list(all_unique_usernames_set) + print(f"\nFinished generating {len(USERNAMES_LIST):,} unique usernames.") + +else: # Default behavior: Generate a sample and print count + NUM_USERNAMES_TO_GENERATE = 16000 # Adjust as needed + print(f"Generating a sample of {NUM_USERNAMES_TO_GENERATE} usernames...") + # Keep the sampling function call + USERNAMES_LIST = [generate_username() for _ in range(NUM_USERNAMES_TO_GENERATE)] + print("Sample generation complete.") + + # --- Calculate and Print Total Possible Combinations --- + num_word1_options = len(ALL_WORD_OPTIONS) + num_sep_options = len(SEPARATORS) + num_word2_options = len(SECOND_PART_OPTIONS) + num_core_combos = num_word1_options * num_sep_options * num_word2_options + + num_symmetrical_pair_options = len(SYMMETRICAL_PAIRS) + + # Number of possible single char sequences (length 1 to MAX) + num_single_seq_options = sum(len(SINGLE_SPECIAL_CHARS)**k for k in range(1, MAX_SINGLE_CHARS_COUNT + 1)) + + # The total number of *unique strings* possible is complex to calculate exactly + # without generating them all and putting them in a set (which make_all does). + # We can estimate based on the structures: + # Core Only: num_core_combos + # Symmetrical Wrap: num_core_combos * num_symmetrical_pair_options + # Single Start (1-MAX): num_core_combos * num_single_seq_options + # Single End (1-MAX): num_core_combos * num_single_seq_options + # Single Both (1-MAX each): num_core_combos * num_single_seq_options * num_single_seq_options + + # This sum is an upper bound / estimate, as some generated strings might overlap + # (e.g., "__user__" could potentially be generated by Single-Start-Both if "__" + # is a sequence, or by Single-Start-End if both are "_"). The set handles this + # in the make_all case. For printing the count, the sum is a good indicator + # of the immense scale of potential unique combinations. + estimated_total_unique_combos = ( + num_core_combos + + (num_core_combos * num_symmetrical_pair_options) + + (num_core_combos * num_single_seq_options) + + (num_core_combos * num_single_seq_options) + + (num_core_combos * num_single_seq_options * num_single_seq_options) + ) + + + print("\n--- Potential Username Combinations ---") + print(f"Number of Word1 options: {num_word1_options:,}") + print(f"Number of Separator options: {num_sep_options:,}") + print(f"Number of Word2 options: {num_word2_options:,}") + print(f"Core combinations (W1+Sep+W2): {num_core_combos:,}") + print(f"Symmetrical Pair wraps: {num_symmetrical_pair_options:,}") + print(f"Single Special Sequences (1-{MAX_SINGLE_CHARS_COUNT}): {num_single_seq_options:,}") + print("-" * 40) + # Use the estimated total for the final number + print(f"Estimated Total Unique Combinations (including special chars): {estimated_total_unique_combos:,}") + print("(This is an estimate based on structural variations; exact count requires generating all)") + print("-------------------------------------\n") + + +# --- Write to File (Shared Logic) --- +print(f"Writing {len(USERNAMES_LIST):,} usernames to '{full_output_path}'...") + +# Format the output string as a Python list assignment +output_string = "# -*- coding: utf-8 -*-\n" # Add encoding declaration to the output file too +output_string += "# Auto-generated list of usernames\n\n" +output_string += "USERNAMES = [\n" + +# Iterate through the generated list (from either mode) and write +for username in USERNAMES_LIST: + # Escape backslashes and double quotes within the username string + # to make it a valid Python string literal + escaped_username = username.replace('\\', '\\\\').replace('"', '\\"') + # Ensure output is valid UTF-8 for file writing + try: + output_string += f' "{escaped_username}",\n' # Indent, quote, add comma and newline + except UnicodeEncodeError: + print(f"Warning: Skipping username with characters incompatible with default encoding: {username}") + continue # Skip writing this username if it causes issues + +output_string += "]\n" # Close the list definition + +# Write the string to the file +try: + # Use 'w' mode to overwrite the file if it exists, create if not + # Specify encoding for broader character support + with open(full_output_path, 'w', encoding='utf-8') as f: + f.write(output_string) + print(f"Successfully wrote {len(USERNAMES_LIST):,} usernames to '{full_output_path}'") + +except IOError as e: + print(f"Error: Could not write to file '{full_output_path}'. Reason: {e}") +except Exception as e: + print(f"An unexpected error occurred during file writing: {e}") \ No newline at end of file diff --git a/logs/requirements.txt b/logs/requirements.txt index 8b13789..44ea884 100644 --- a/logs/requirements.txt +++ b/logs/requirements.txt @@ -1 +1,18 @@ +# Core dependencies for convert.py +pandas>=1.3.0 +pandas-image-methods>=0.2.0 +transformers>=4.20.0 +torch>=1.12.0 +tqdm>=4.64.0 +pillow>=9.0.0 +pyarrow>=10.0.0 + +# Optional dependencies for enhanced functionality +datasets>=2.0.0 +dask[complete]>=2022.7.0 +distributed>=2022.7.0 + +# Additional utility dependencies +numpy>=1.21.0 +requests>=2.25.0 From a6c94778f20710c778c6d49f5485e51963e038b1 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 18:16:04 -0700 Subject: [PATCH 25/26] Update README.md Added logging and conversion information --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index b468ada..2dafabc 100644 --- a/README.md +++ b/README.md @@ -188,6 +188,22 @@ Supported Embedding APIs: `openai`, `google`, `replicate`, `huggingface`, `novit If you try to use an unsupported model, then it will default to a simple word-overlap method. Expect reduced performance, recommend mixing APIs to ensure embedding support. +## Dataset collection + +Mindcraft has the capabilities to collect data from you playing with the bots, which can be used to generate training data to fine-tune models such as Andy-4. To do this, enable logging inside of `settings.js`, then navigate to the `logs` folder. + +Inside of the logs folder, and installing the dependecies, you will find a file named `generate_usernames.py`, you need to run this in order to convert your collected data into a usable dataset. This will generate a bunch of random names to replace the name of your bot, and your username. Both of which improve performance later on. + +To run it, run `python generate_usernames.py`. The max amount of usernames will take up multiple Terabytes of data. If for some reason you want to do this, run it with the `--make_all` flag. + +Next, you need to set up `convert.py` to include every username that interacted with the bot, as well as the bot's own username. This is done by adding / changing the usernames in the `ORIGINAL_USERNAMES` list. + +After this, you are all set up for conversion! Since you might not want to convert all data at once, you must change the names of the `.csv` file*(s)* that you want to convert to `Andy_pre1`. If more than one file is wanted for conversion, change `1` to the next number, this value can be as high as you want. + +To convert, run `python convert.py`, if you get a dependency error, ensure you are in a virtual python environment rather than a global one. + +For setting up vision datasets, run `convert.py` with the flag of `--vision`, this will do the same thing as the rest of the conversions, but change the format to an image-friendly way. + ## Specifying Profiles via Command Line By default, the program will use the profiles specified in `settings.js`. You can specify one or more agent profiles using the `--profiles` argument: `node main.js --profiles ./profiles/andy.json ./profiles/jill.json` From 78e3785c07d80b7d2b97794d9f3618fdd7b3a0a7 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Sat, 7 Jun 2025 18:16:33 -0700 Subject: [PATCH 26/26] Update convert.py Changed names --- logs/convert.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/logs/convert.py b/logs/convert.py index b15770b..f78ec22 100644 --- a/logs/convert.py +++ b/logs/convert.py @@ -41,8 +41,7 @@ while True: # Define the original usernames. ORIGINAL_USERNAMES = [ - "SweaterDog_YT", "SweaterDog", "Sweaterdog", "Foolish_Pear69", "Farquadthegod72", "Hank", - "Gordan", "Perry", "Frederick", "Oliver", "Bill", "Ashley", "Greg", "Treb", "Mia", "Tia", "ALBeRT", "Jason" + "Your_username", "Andy" ] # Define outputs that should cause the conversation to be deleted.