diff --git a/README.md b/README.md
index 07ce15a..6ab46c2 100644
--- a/README.md
+++ b/README.md
@@ -16,13 +16,13 @@ Do not connect this bot to public servers with coding enabled. This project allo
## Install and Run
-1. Make sure you have the requirements above.
+1. Make sure you have the requirements above. If you plan to use the STT (Speech-to-Text) feature, also review the "Installation Prerequisites" section regarding `naudiodon`.
2. Clone or download this repository (big green button) 'git clone https://github.com/kolbytn/mindcraft.git'
3. Rename `keys.example.json` to `keys.json` and fill in your API keys (you only need one). The desired model is set in `andy.json` or other profiles. For other models refer to the table below.
-4. In terminal/command prompt, run `npm install` from the installed directory
+4. In terminal/command prompt, run `npm install` from the installed directory. (Note: If `naudiodon` fails to build and you don't need STT, you can usually proceed.)
5. Start a minecraft world and open it to LAN on localhost port `55916`
@@ -53,7 +53,7 @@ You can configure the agent's name, model, and prompts in their profile like `an
| `anthropic` | `ANTHROPIC_API_KEY` | `claude-3-haiku-20240307` | [docs](https://docs.anthropic.com/claude/docs/models-overview) |
| `xai` | `XAI_API_KEY` | `grok-2-1212` | [docs](https://docs.x.ai/docs) |
| `deepseek` | `DEEPSEEK_API_KEY` | `deepseek-chat` | [docs](https://api-docs.deepseek.com/) |
-| `ollama` (local) | n/a | `ollama/llama3.1` | [docs](https://ollama.com/library) |
+| `ollama` (local) | n/a | `ollama/sweaterdog/andy-4` | [docs](https://ollama.com/library) |
| `qwen` | `QWEN_API_KEY` | `qwen-max` | [Intl.](https://www.alibabacloud.com/help/en/model-studio/developer-reference/use-qwen-by-calling-api)/[cn](https://help.aliyun.com/zh/model-studio/getting-started/models) |
| `mistral` | `MISTRAL_API_KEY` | `mistral-large-latest` | [docs](https://docs.mistral.ai/getting-started/models/models_overview/) |
| `replicate` | `REPLICATE_API_KEY` | `replicate/meta/meta-llama-3-70b-instruct` | [docs](https://replicate.com/collections/language-models) |
@@ -66,7 +66,25 @@ You can configure the agent's name, model, and prompts in their profile like `an
| `vllm` | n/a | `vllm/llama3` | n/a |
If you use Ollama, to install the models used by default (generation and embedding), execute the following terminal command:
-`ollama pull llama3.1 && ollama pull nomic-embed-text`
+`ollama pull sweaterdog/andy-4 && ollama pull nomic-embed-text`
+
+ Additional info about Andy-4...
+
+ 
+
+
+ Andy-4 is a community made, open-source model made by Sweaterdog to play Minecraft.
+ Since Andy-4 is open-source, which means you can download the model, and play with it offline and for free.
+
+ The Andy-4 collection of models has reasoning and non-reasoning modes, sometimes the model will reason automatically without being prompted.
+ If you want to specifically enable reasoning, use the `andy-4-reasoning.json` profile.
+ Some Andy-4 models may not be able to disable reasoning, no matter what profile is used.
+
+ Andy-4 has many different models, and come in different sizes.
+ For more information about which model size is best for you, check [Sweaterdog's Ollama page](https://ollama.com/Sweaterdog/Andy-4)
+
+ If you have any Issues, join the Mindcraft server, and ping `@Sweaterdog` with your issue, or leave an issue on the [Andy-4 huggingface repo](https://huggingface.co/Sweaterdog/Andy-4/discussions/new)
+
### Online Servers
To connect to online servers your bot will need an official Microsoft/Minecraft account. You can use your own personal one, but will need another account if you want to connect too and play with it. To connect, change these lines in `settings.js`:
@@ -102,6 +120,21 @@ When running in docker, if you want the bot to join your local minecraft server,
To connect to an unsupported minecraft version, you can try to use [viaproxy](services/viaproxy/README.md)
+## STT in Mindcraft
+
+STT allows you to speak to the model if you have a microphone
+
+STT can be enabled in `settings.js` under the section that looks like this:
+```javascript
+ "stt_transcription": true, // Change this to "true" to enable STT
+ "stt_username": "SYSTEM",
+ "stt_agent_name": ""
+```
+
+The Text to Speech engine will begin listening on the system default input device. **Note:** Successful STT operation depends on the `naudiodon` package, which is an optional dependency. If `naudiodon` failed to install or build (see "Installation Prerequisites" for troubleshooting), STT will be disabled.
+
+When using STT, you **need** a [GroqCloud API key](https://console.groq.com/keys) as Groq is used for Audio transcription
+
# Bot Profiles
Bot profiles are json files (such as `andy.json`) that define:
@@ -155,6 +188,22 @@ Supported Embedding APIs: `openai`, `google`, `replicate`, `huggingface`, `novit
If you try to use an unsupported model, then it will default to a simple word-overlap method. Expect reduced performance, recommend mixing APIs to ensure embedding support.
+## Dataset collection
+
+Mindcraft has the capabilities to collect data from you playing with the bots, which can be used to generate training data to fine-tune models such as Andy-4. To do this, enable logging inside of `settings.js`, then navigate to the `logs` folder.
+
+Inside of the logs folder, and installing the dependecies, you will find a file named `generate_usernames.py`, you need to run this in order to convert your collected data into a usable dataset. This will generate a bunch of random names to replace the name of your bot, and your username. Both of which improve performance later on.
+
+To run it, run `python generate_usernames.py`. The max amount of usernames will take up multiple Terabytes of data. If for some reason you want to do this, run it with the `--make_all` flag.
+
+Next, you need to set up `convert.py` to include every username that interacted with the bot, as well as the bot's own username. This is done by adding / changing the usernames in the `ORIGINAL_USERNAMES` list.
+
+After this, you are all set up for conversion! Since you might not want to convert all data at once, you must change the names of the `.csv` file*(s)* that you want to convert to `Andy_pre1`. If more than one file is wanted for conversion, change `1` to the next number, this value can be as high as you want.
+
+To convert, run `python convert.py`, if you get a dependency error, ensure you are in a virtual python environment rather than a global one.
+
+For setting up vision datasets, run `convert.py` with the flag of `--vision`, this will do the same thing as the rest of the conversions, but change the format to an image-friendly way.
+
## Specifying Profiles via Command Line
By default, the program will use the profiles specified in `settings.js`. You can specify one or more agent profiles using the `--profiles` argument: `node main.js --profiles ./profiles/andy.json ./profiles/jill.json`
diff --git a/keys.example.json b/keys.example.json
index 99286c5..d9edf8b 100644
--- a/keys.example.json
+++ b/keys.example.json
@@ -1,17 +1,17 @@
-{
- "OPENAI_API_KEY": "",
- "OPENAI_ORG_ID": "",
- "GEMINI_API_KEY": "",
- "ANTHROPIC_API_KEY": "",
- "REPLICATE_API_KEY": "",
- "GROQCLOUD_API_KEY": "",
- "HUGGINGFACE_API_KEY": "",
- "QWEN_API_KEY": "",
- "XAI_API_KEY": "",
- "MISTRAL_API_KEY": "",
- "DEEPSEEK_API_KEY": "",
- "GHLF_API_KEY": "",
- "HYPERBOLIC_API_KEY": "",
- "NOVITA_API_KEY": "",
- "OPENROUTER_API_KEY": ""
-}
+{
+ "OPENAI_API_KEY": "",
+ "OPENAI_ORG_ID": "",
+ "GEMINI_API_KEY": "",
+ "ANTHROPIC_API_KEY": "",
+ "REPLICATE_API_KEY": "",
+ "GROQCLOUD_API_KEY": "",
+ "HUGGINGFACE_API_KEY": "",
+ "QWEN_API_KEY": "",
+ "XAI_API_KEY": "",
+ "MISTRAL_API_KEY": "",
+ "DEEPSEEK_API_KEY": "",
+ "GHLF_API_KEY": "",
+ "HYPERBOLIC_API_KEY": "",
+ "NOVITA_API_KEY": "",
+ "OPENROUTER_API_KEY": ""
+}
diff --git a/logger.js b/logger.js
new file mode 100644
index 0000000..3848349
--- /dev/null
+++ b/logger.js
@@ -0,0 +1,432 @@
+import { writeFileSync, mkdirSync, existsSync, appendFileSync, readFileSync } from 'fs';
+import { join } from 'path';
+import settings from './settings.js'; // Import settings
+import path from 'path'; // Needed for path operations
+
+// --- Configuration ---
+const LOGS_DIR = './logs';
+const VISION_DATASET_DIR = join(LOGS_DIR, 'vision_dataset'); // HuggingFace dataset format
+const VISION_IMAGES_DIR = join(VISION_DATASET_DIR, 'images'); // Images subdirectory
+
+// --- Log File Paths ---
+const REASONING_LOG_FILE = join(LOGS_DIR, 'reasoning_logs.csv');
+const NORMAL_LOG_FILE = join(LOGS_DIR, 'normal_logs.csv');
+const VISION_METADATA_FILE = join(VISION_DATASET_DIR, 'metadata.jsonl'); // HF metadata format
+
+// --- Log Headers ---
+const TEXT_LOG_HEADER = 'input,output\n';
+
+// --- Log Counters ---
+let logCounts = {
+ normal: 0,
+ reasoning: 0,
+ vision: 0,
+ total: 0,
+ skipped_disabled: 0,
+ skipped_empty: 0,
+ vision_images_saved: 0,
+};
+
+// --- Helper Functions ---
+function ensureDirectoryExistence(dirPath) {
+ if (!existsSync(dirPath)) {
+ try {
+ mkdirSync(dirPath, { recursive: true });
+ console.log(`[Logger] Created directory: ${dirPath}`);
+ } catch (error) {
+ console.error(`[Logger] Error creating directory ${dirPath}:`, error);
+ return false;
+ }
+ }
+ return true;
+}
+
+function countLogEntries(logFile) {
+ if (!existsSync(logFile)) return 0;
+ try {
+ const data = readFileSync(logFile, 'utf8');
+ const lines = data.split('\n').filter(line => line.trim());
+ // Check if the first line looks like a header before subtracting
+ const hasHeader = lines.length > 0 && lines[0].includes(',');
+ return Math.max(0, hasHeader ? lines.length - 1 : lines.length);
+ } catch (err) {
+ console.error(`[Logger] Error reading log file ${logFile}:`, err);
+ return 0;
+ }
+}
+
+
+function ensureLogFile(logFile, header) {
+ if (!ensureDirectoryExistence(path.dirname(logFile))) return false; // Ensure parent dir exists
+
+ if (!existsSync(logFile)) {
+ try {
+ writeFileSync(logFile, header);
+ console.log(`[Logger] Created log file: ${logFile}`);
+ } catch (error) {
+ console.error(`[Logger] Error creating log file ${logFile}:`, error);
+ return false;
+ }
+ } else {
+ try {
+ const content = readFileSync(logFile, 'utf-8');
+ const headerLine = header.split('\n')[0];
+ // If file is empty or header doesn't match, overwrite/create header
+ if (!content.trim() || !content.startsWith(headerLine)) {
+ // Attempt to prepend header if file has content but wrong/no header
+ if(content.trim() && !content.startsWith(headerLine)) {
+ console.warn(`[Logger] Log file ${logFile} seems to be missing or has an incorrect header. Prepending correct header.`);
+ writeFileSync(logFile, header + content);
+ } else {
+ // File is empty or correctly headed, just ensure header is there
+ writeFileSync(logFile, header);
+ }
+ console.log(`[Logger] Ensured header in log file: ${logFile}`);
+ }
+ } catch (error) {
+ console.error(`[Logger] Error checking/writing header for log file ${logFile}:`, error);
+ // Proceed cautiously, maybe log an error and continue?
+ }
+ }
+ return true;
+}
+
+
+function writeToLogFile(logFile, csvEntry) {
+ try {
+ appendFileSync(logFile, csvEntry);
+ // console.log(`[Logger] Logged data to ${logFile}`); // Keep console less noisy
+ } catch (error) {
+ console.error(`[Logger] Error writing to CSV log file ${logFile}:`, error);
+ }
+}
+
+// --- Auto-Detection for Log Type (Based on Response Content) ---
+function determineLogType(response) {
+ // Reasoning check: needs ... but ignore the specific 'undefined' placeholder
+ const isReasoning = response.includes('') && response.includes('') && !response.includes('\nundefined');
+
+ if (isReasoning) {
+ return 'reasoning';
+ } else {
+ return 'normal';
+ }
+}
+
+function sanitizeForCsv(value) {
+ if (typeof value !== 'string') {
+ value = String(value);
+ }
+ // Escape double quotes by doubling them and enclose the whole string in double quotes
+ return `"${value.replace(/"/g, '""')}"`;
+}
+
+// Helper function to clean reasoning markers from input
+function cleanReasoningMarkers(input) {
+ if (typeof input !== 'string') {
+ return input;
+ }
+
+ // Remove /think and /no_think markers
+ return input.replace(/\/think/g, '').replace(/\/no_think/g, '').trim();
+}
+
+// Helper function to clean imagePath from messages for text logs
+function cleanImagePathFromMessages(input) {
+ if (typeof input !== 'string') {
+ return input;
+ }
+
+ try {
+ const parsed = JSON.parse(input);
+ if (Array.isArray(parsed)) {
+ const cleaned = parsed.map(msg => {
+ let cleanedMsg = { ...msg }; // Clone message
+
+ // Remove top-level imagePath
+ if (cleanedMsg.imagePath !== undefined) {
+ delete cleanedMsg.imagePath;
+ }
+
+ // Remove image_url from content array
+ if (Array.isArray(cleanedMsg.content)) {
+ cleanedMsg.content = cleanedMsg.content.filter(part =>
+ part.type !== 'image_url' &&
+ !(part.type === 'image' && part.source) // Also filter Claude-style image parts
+ );
+
+ // If content becomes empty after filtering, remove it or set to empty string
+ if (cleanedMsg.content.length === 0) {
+ cleanedMsg.content = "";
+ } else if (cleanedMsg.content.length === 1 &&
+ cleanedMsg.content[0].type === 'text' &&
+ !cleanedMsg.content[0].text?.trim()) {
+ cleanedMsg.content = "";
+ }
+ }
+ return cleanedMsg;
+ });
+ return JSON.stringify(cleaned);
+ }
+ } catch (e) {
+ // If not valid JSON, return as-is
+ return input;
+ }
+
+ return input;
+}
+
+// --- Main Logging Function (for text-based input/output) ---
+export function log(input, response) {
+ const trimmedInputStr = input ? (typeof input === 'string' ? input.trim() : JSON.stringify(input)) : "";
+ const trimmedResponse = response ? String(response).trim() : ""; // Ensure response is a string
+
+ // Clean reasoning markers from input before logging
+ let cleanedInput = cleanReasoningMarkers(trimmedInputStr);
+
+ // Clean imagePath from messages for text logs (normal/reasoning)
+ cleanedInput = cleanImagePathFromMessages(cleanedInput);
+
+ // Basic filtering
+ if (!cleanedInput && !trimmedResponse) {
+ logCounts.skipped_empty++;
+ return;
+ }
+ if (cleanedInput === trimmedResponse) {
+ logCounts.skipped_empty++;
+ return;
+ }
+ // Avoid logging common error messages that aren't useful training data
+ const errorMessages = [
+ "My brain disconnected, try again.",
+ "My brain just kinda stopped working. Try again.",
+ "I thought too hard, sorry, try again.",
+ "*no response*",
+ "No response received.",
+ "No response data.",
+ "Failed to send", // Broader match
+ "Error:", // Broader match
+ "Vision is only supported",
+ "Context length exceeded",
+ "Image input modality is not enabled",
+ "An unexpected error occurred",
+ // Add more generic errors/placeholders as needed
+ ];
+ // Also check for responses that are just the input repeated (sometimes happens with errors)
+ if (errorMessages.some(err => trimmedResponse.includes(err)) || trimmedResponse === cleanedInput) {
+ logCounts.skipped_empty++;
+ // console.warn(`[Logger] Skipping log due to error/placeholder/repeat: "${trimmedResponse.substring(0, 70)}..."`);
+ return;
+ }
+
+
+ const logType = determineLogType(trimmedResponse);
+ let logFile;
+ let header;
+ let settingFlag;
+
+ switch (logType) {
+ case 'reasoning':
+ logFile = REASONING_LOG_FILE;
+ header = TEXT_LOG_HEADER;
+ settingFlag = settings.log_reasoning_data;
+ break;
+ case 'normal':
+ default:
+ logFile = NORMAL_LOG_FILE;
+ header = TEXT_LOG_HEADER;
+ settingFlag = settings.log_normal_data;
+ break;
+ }
+
+ // Check if logging for this type is enabled
+ if (!settingFlag) {
+ logCounts.skipped_disabled++;
+ return;
+ }
+
+ // Ensure directory and file exist
+ if (!ensureLogFile(logFile, header)) return; // ensureLogFile now checks parent dir too
+
+ // Prepare the CSV entry using the sanitizer with cleaned input
+ const safeInput = sanitizeForCsv(cleanedInput);
+ const safeResponse = sanitizeForCsv(trimmedResponse);
+ const csvEntry = `${safeInput},${safeResponse}\n`;
+
+ // Write to the determined log file
+ writeToLogFile(logFile, csvEntry);
+
+ // Update counts
+ logCounts[logType]++;
+ logCounts.total++; // Total here refers to text logs primarily
+
+ // Display summary periodically (based on total text logs)
+ if (logCounts.normal + logCounts.reasoning > 0 && (logCounts.normal + logCounts.reasoning) % 20 === 0) {
+ printSummary();
+ }
+}
+
+// --- Enhanced Vision Logging Function for HuggingFace Dataset Format ---
+export function logVision(conversationHistory, imageBuffer, response, visionMessage = null) {
+ if (!settings.log_vision_data) {
+ logCounts.skipped_disabled++;
+ return;
+ }
+
+ const trimmedResponse = response ? String(response).trim() : "";
+
+ if (!conversationHistory || conversationHistory.length === 0 || !trimmedResponse || !imageBuffer) {
+ logCounts.skipped_empty++;
+ return;
+ }
+
+ // Filter out error messages
+ const errorMessages = [
+ "My brain disconnected, try again.",
+ "My brain just kinda stopped working. Try again.",
+ "I thought too hard, sorry, try again.",
+ "*no response*",
+ "No response received.",
+ "No response data.",
+ "Failed to send",
+ "Error:",
+ "Vision is only supported",
+ "Context length exceeded",
+ "Image input modality is not enabled",
+ "An unexpected error occurred",
+ "Image captured for always active vision", // Filter out placeholder responses
+ ];
+
+ if (errorMessages.some(err => trimmedResponse.includes(err))) {
+ logCounts.skipped_empty++;
+ return;
+ }
+
+ // Ensure directories exist
+ if (!ensureDirectoryExistence(VISION_DATASET_DIR)) return;
+ if (!ensureDirectoryExistence(VISION_IMAGES_DIR)) return;
+
+ try {
+ // Generate unique filename for the image
+ const timestamp = Date.now();
+ const randomSuffix = Math.random().toString(36).substring(2, 8);
+ const imageFilename = `vision_${timestamp}_${randomSuffix}.jpg`;
+ const imagePath = join(VISION_IMAGES_DIR, imageFilename);
+ const relativeImagePath = `images/${imageFilename}`; // Relative path for metadata
+
+ // Save the image
+ writeFileSync(imagePath, imageBuffer);
+ logCounts.vision_images_saved++;
+
+ // Clean the conversation history to remove imagePath and image data before logging
+ const cleanedConversationHistory = JSON.parse(cleanImagePathFromMessages(JSON.stringify(conversationHistory)));
+
+ // Format the complete input as JSON (cleaned conversation history)
+ const inputData = JSON.stringify(cleanedConversationHistory);
+
+ // Create metadata entry in JSONL format for HuggingFace
+ const metadataEntry = {
+ file_name: relativeImagePath,
+ input: inputData, // Cleaned JSON conversation history
+ response: trimmedResponse, // Actual model response, not placeholder
+ timestamp: timestamp
+ };
+
+ // Append to metadata JSONL file
+ const jsonlLine = JSON.stringify(metadataEntry) + '\n';
+ appendFileSync(VISION_METADATA_FILE, jsonlLine);
+
+ logCounts.vision++;
+ logCounts.total++;
+
+ // Display summary periodically
+ if (logCounts.vision > 0 && logCounts.vision % 10 === 0) {
+ printSummary();
+ }
+
+ } catch (error) {
+ console.error(`[Logger] Error logging vision data:`, error);
+ }
+}
+
+// Helper function to format conversation history as fallback
+function formatConversationInput(conversationHistory) {
+ if (!conversationHistory || conversationHistory.length === 0) return '';
+
+ const formattedHistory = [];
+
+ for (const turn of conversationHistory) {
+ const formattedTurn = {
+ role: turn.role || 'user',
+ content: []
+ };
+
+ // Handle different content formats
+ if (typeof turn.content === 'string') {
+ formattedTurn.content.push({
+ type: 'text',
+ text: turn.content
+ });
+ } else if (Array.isArray(turn.content)) {
+ // Already in the correct format
+ formattedTurn.content = turn.content;
+ } else if (turn.content && typeof turn.content === 'object') {
+ // Convert object to array format
+ if (turn.content.text) {
+ formattedTurn.content.push({
+ type: 'text',
+ text: turn.content.text
+ });
+ }
+ if (turn.content.image) {
+ formattedTurn.content.push({
+ type: 'image',
+ image: turn.content.image
+ });
+ }
+ }
+
+ formattedHistory.push(formattedTurn);
+ }
+
+ return JSON.stringify(formattedHistory);
+}
+
+function printSummary() {
+ const totalStored = logCounts.normal + logCounts.reasoning + logCounts.vision;
+ console.log('\n' + '='.repeat(60));
+ console.log('LOGGER SUMMARY');
+ console.log('-'.repeat(60));
+ console.log(`Normal logs stored: ${logCounts.normal}`);
+ console.log(`Reasoning logs stored: ${logCounts.reasoning}`);
+ console.log(`Vision logs stored: ${logCounts.vision} (Images saved: ${logCounts.vision_images_saved})`);
+ console.log(`Skipped (disabled): ${logCounts.skipped_disabled}`);
+ console.log(`Skipped (empty/err): ${logCounts.skipped_empty}`);
+ console.log('-'.repeat(60));
+ console.log(`Total logs stored: ${totalStored}`);
+ console.log('='.repeat(60) + '\n');
+}
+
+// Initialize counts at startup
+function initializeCounts() {
+ logCounts.normal = countLogEntries(NORMAL_LOG_FILE);
+ logCounts.reasoning = countLogEntries(REASONING_LOG_FILE);
+ logCounts.vision = countVisionEntries(VISION_METADATA_FILE);
+ // Total count will be accumulated during runtime
+ console.log(`[Logger] Initialized log counts: Normal=${logCounts.normal}, Reasoning=${logCounts.reasoning}, Vision=${logCounts.vision}`);
+}
+
+function countVisionEntries(metadataFile) {
+ if (!existsSync(metadataFile)) return 0;
+ try {
+ const data = readFileSync(metadataFile, 'utf8');
+ const lines = data.split('\n').filter(line => line.trim());
+ return lines.length;
+ } catch (err) {
+ console.error(`[Logger] Error reading vision metadata file ${metadataFile}:`, err);
+ return 0;
+ }
+}
+
+// Initialize counts at startup
+initializeCounts();
diff --git a/logs/convert.py b/logs/convert.py
new file mode 100644
index 0000000..f78ec22
--- /dev/null
+++ b/logs/convert.py
@@ -0,0 +1,964 @@
+import csv
+import json
+import logging
+import sys
+import os
+import random
+from typing import List, Dict
+import pandas as pd
+from USERNAMES import Get_Usernames
+from transformers import AutoTokenizer
+from tqdm import tqdm
+import torch
+from PIL import Image
+import base64
+from io import BytesIO
+
+# Try to import pandas-image-methods for vision data handling
+try:
+ from pandas_image_methods import PILMethods
+ PANDAS_IMAGE_METHODS_AVAILABLE = True
+ # Enable PIL methods for pandas
+ pd.api.extensions.register_series_accessor("pil")(PILMethods)
+except ImportError:
+ PANDAS_IMAGE_METHODS_AVAILABLE = False
+ logging.warning("pandas-image-methods not available. Install with: pip install pandas-image-methods")
+
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# Increase CSV field size limit to avoid errors with very large fields.
+maxInt = sys.maxsize
+while True:
+ try:
+ csv.field_size_limit(maxInt)
+ break
+ except OverflowError:
+ maxInt = int(maxInt/10)
+
+# Define the original usernames.
+ORIGINAL_USERNAMES = [
+ "Your_username", "Andy"
+]
+
+# Define outputs that should cause the conversation to be deleted.
+BAD_OUTPUTS = {
+ "My brain just kinda stopped working. Try again.",
+ "My brain disconnected, try again.",
+ "Vision is only supported",
+ "Context length exceeded",
+ "Image input modality is not enabled",
+ "An unexpected error occurred",
+}
+
+MINECRAFT_USERNAMES = list(set(Get_Usernames())) # Remove duplicates
+duplicate_count = len(Get_Usernames()) - len(MINECRAFT_USERNAMES)
+
+available_minecraft_usernames = list(MINECRAFT_USERNAMES) # Create a copy for tracking
+
+global username_replaced_count
+global reasoning_replaced_count
+username_replaced_count = 0
+reasoning_replaced_count = 0
+
+def replace_reasoning_prompt(text: str) -> str:
+ global reasoning_replaced_count
+ replaced = False
+ # Optionally, replace the reasoning prompt if needed.
+ if replaced:
+ reasoning_replaced_count += 1
+ return text
+
+def parse_json_safely(text: str) -> List[Dict[str, str]]:
+ try:
+ if text.startswith('[') and '],' in text:
+ parts = text.split('],')
+ text = parts[0] + ']'
+ if text.startswith('"') and text.endswith('"'):
+ text = text[1:-1]
+ text = text.replace('""', '"')
+ data = json.loads(text)
+ if isinstance(data, list) and len(data) > 0 and isinstance(data[0], list):
+ data = data[0]
+ converted_messages = []
+ for msg in data:
+ if isinstance(msg, dict) and 'role' in msg and 'content' in msg:
+ converted_messages.append({
+ "from": "human" if msg['role'] in ("system", "user") else "gpt",
+ "value": msg['content']
+ })
+ return converted_messages
+ except Exception as e:
+ logger.debug(f"Error parsing JSON: {e}") # Suppressed error level
+ return [{
+ "from": "human",
+ "value": text
+ }]
+
+def create_conversation_thread(row: Dict[str, str]) -> List[Dict[str, str]]:
+ messages = []
+ conversation_replacements = {} # Track username replacements for this conversation ONLY
+
+ def replace_usernames_in_message(text: str) -> str:
+ global username_replaced_count
+ global available_minecraft_usernames
+ replaced = False
+
+ if not MINECRAFT_USERNAMES:
+ return text
+
+ for orig_name in ORIGINAL_USERNAMES:
+ if orig_name in text:
+ if orig_name not in conversation_replacements:
+ # If we've used all available names, reset the list
+ if not available_minecraft_usernames:
+ available_minecraft_usernames = list(MINECRAFT_USERNAMES)
+ # Get a random name from the available ones
+ replacement = random.choice(available_minecraft_usernames)
+ available_minecraft_usernames.remove(replacement)
+ conversation_replacements[orig_name] = replacement
+ replaced = True
+ # Use existing replacement for this conversation
+ text = text.replace(orig_name, conversation_replacements[orig_name])
+
+ if replaced:
+ username_replaced_count += 1
+ return text
+
+ if row.get("input"):
+ messages = parse_json_safely(str(row["input"]))
+ # Apply consistent username replacements to all messages
+ for msg in messages:
+ msg["value"] = replace_usernames_in_message(msg["value"])
+
+ if row.get("output"):
+ output_text = str(row["output"]).strip()
+ output_text = replace_usernames_in_message(output_text)
+ output_text = replace_reasoning_prompt(output_text)
+ messages.append({
+ "from": "gpt",
+ "value": output_text
+ })
+
+ return messages
+
+def conversation_has_bad_output(messages: List[Dict[str, str]]) -> bool:
+ for msg in messages:
+ if msg["from"] == "gpt" and msg["value"].strip() in BAD_OUTPUTS:
+ return True
+ return False
+
+def load_image_from_base64(base64_string: str):
+ """Convert base64 string to PIL Image"""
+ try:
+ if base64_string.startswith('data:'):
+ base64_string = base64_string.split(',')[1]
+
+ image_bytes = base64.b64decode(base64_string)
+ image = Image.open(BytesIO(image_bytes))
+
+ if image.mode in ('RGBA', 'LA', 'P'):
+ image = image.convert('RGB')
+
+ return image
+ except Exception as e:
+ logger.debug(f"Error loading image from base64: {e}")
+ return Image.new('RGB', (224, 224), color='gray')
+
+def pil_image_to_parquet_dict(image: Image.Image, filename: str) -> Dict:
+ """Converts a PIL Image to the dictionary format {bytes, path} for Parquet."""
+ img_byte_arr = BytesIO()
+ # Determine a suitable save format
+ save_format = image.format if image.format and image.format in Image.SAVE else 'PNG'
+
+ # Handle specific mode conversions if necessary for the chosen format
+ if save_format == 'PNG' and image.mode not in ['RGB', 'RGBA', 'L', 'P', 'I', 'F']: # Common PNG modes
+ # Convert to a mode PNG supports, e.g., RGBA to preserve transparency
+ image_to_save = image.convert("RGBA")
+ elif save_format == 'JPEG' and image.mode not in ['RGB', 'L', 'CMYK']:
+ # Convert to a mode JPEG supports
+ image_to_save = image.convert("RGB")
+ else:
+ image_to_save = image
+
+ try:
+ image_to_save.save(img_byte_arr, format=save_format)
+ except Exception as e:
+ logger.warning(f"Could not save image {filename} in format {save_format} (Error: {e}). Attempting PNG.")
+ save_format = 'PNG'
+ if image_to_save.mode not in ['RGB', 'RGBA', 'L', 'P', 'I', 'F']:
+ image_to_save = image.convert("RGBA") # Default to RGBA for PNG
+ image_to_save.save(img_byte_arr, format=save_format)
+
+ return {"bytes": img_byte_arr.getvalue(), "path": filename}
+
+def extract_vision_data_from_jsonl(jsonl_path: str) -> List[Dict]:
+ """Extract vision data from HuggingFace JSONL metadata format"""
+ if not os.path.isfile(jsonl_path):
+ logger.error(f"JSONL file not found: {jsonl_path}")
+ return []
+
+ logger.info(f"Reading vision metadata: {jsonl_path}")
+
+ # Get the directory containing the JSONL file (should contain images folder)
+ base_dir = os.path.dirname(jsonl_path)
+ images_dir = os.path.join(base_dir, 'images')
+
+ if not os.path.isdir(images_dir):
+ logger.error(f"Images directory not found: {images_dir}")
+ return []
+
+ vision_data = []
+
+ with open(jsonl_path, 'r', encoding='utf-8') as f:
+ for line_num, line in enumerate(f, 1):
+ line = line.strip()
+ if not line:
+ continue
+
+ try:
+ entry = json.loads(line)
+
+ # Extract required fields - logger.js uses 'input' and 'response', not 'text'
+ file_name = entry.get('file_name', '')
+ input_data = entry.get('input', '')
+ response = entry.get('response', '')
+
+ if not all([file_name, input_data, response]):
+ logger.warning(f"Line {line_num}: Missing required fields (file_name, input, response)")
+ continue
+
+ # Check for bad outputs
+ if response.strip() in BAD_OUTPUTS:
+ logger.debug(f"Line {line_num}: Skipping bad output")
+ continue
+
+ # Load the image
+ image_path = os.path.join(base_dir, file_name)
+ if not os.path.isfile(image_path):
+ logger.warning(f"Line {line_num}: Image file not found: {image_path}")
+ continue
+
+ try:
+ image = Image.open(image_path)
+ if image.mode in ('RGBA', 'LA', 'P') and image.format != 'PNG': # PNG handles these modes well
+ image = image.convert('RGB') # Convert to RGB if not PNG to simplify, or handle more modes in pil_image_to_parquet_dict
+ except Exception as e:
+ logger.warning(f"Line {line_num}: Error loading image {image_path}: {e}")
+ continue
+
+ # Convert PIL image to parquet-compatible dict
+ relative_image_path_for_dict = file_name # Use the relative path from metadata
+ image_dict = pil_image_to_parquet_dict(image, relative_image_path_for_dict)
+
+ # Create a separate conversation_replacements for each vision entry
+ entry_conversation_replacements = {}
+
+ # Replace usernames consistently within this single entry
+ def replace_usernames_in_text(text: str) -> str:
+ global username_replaced_count
+ global available_minecraft_usernames
+ replaced = False
+
+ if not MINECRAFT_USERNAMES:
+ return text
+
+ for orig_name in ORIGINAL_USERNAMES:
+ if orig_name in text:
+ if orig_name not in entry_conversation_replacements:
+ if not available_minecraft_usernames:
+ available_minecraft_usernames = list(MINECRAFT_USERNAMES)
+ replacement = random.choice(available_minecraft_usernames)
+ available_minecraft_usernames.remove(replacement)
+ entry_conversation_replacements[orig_name] = replacement
+ replaced = True
+ text = text.replace(orig_name, entry_conversation_replacements[orig_name])
+
+ if replaced:
+ username_replaced_count += 1
+ return text
+
+ # Parse the input data (conversation history) and build conversation
+ try:
+ # The input_data should be JSON string of conversation history
+ conversation_history = json.loads(input_data)
+
+ # Build the conversation in unsloth format
+ conversation = []
+
+ if isinstance(conversation_history, list):
+ for msg in conversation_history:
+ if isinstance(msg, dict) and 'role' in msg:
+ role = msg['role']
+ # Map system messages to user role for simplicity
+ if role == 'system':
+ role = 'user'
+
+ content_parts = []
+
+ # Handle different content formats
+ if 'content' in msg:
+ content = msg['content']
+ if isinstance(content, str):
+ # Simple string content
+ text_content = replace_usernames_in_text(content)
+ content_parts.append({"type": "text", "text": text_content})
+ elif isinstance(content, list):
+ # Array content (multimodal messages)
+ for part in content:
+ if isinstance(part, dict):
+ if part.get('type') == 'text':
+ text_content = part.get('text', '')
+ if text_content:
+ text_content = replace_usernames_in_text(text_content)
+ content_parts.append({"type": "text", "text": text_content})
+ # Skip image parts from history - we'll add the main image to the user message
+ elif any(key in msg for key in ['text', 'message', 'value']):
+ # Handle other message formats
+ text_content = msg.get('text') or msg.get('message') or msg.get('value', '')
+ if text_content:
+ text_content = replace_usernames_in_text(str(text_content))
+ content_parts.append({"type": "text", "text": text_content})
+
+ if content_parts:
+ conversation.append({
+ "role": role,
+ "content": content_parts
+ })
+
+ # If no conversation history was parsed or it's empty, create a simple user message
+ if not conversation:
+ # Use the raw input data as text
+ text_content = replace_usernames_in_text(str(input_data).strip())
+ conversation.append({
+ "role": "user",
+ "content": [{"type": "text", "text": text_content}]
+ })
+
+ # Add the image to the last user message (or create one if none exists)
+ user_msg_found = False
+ for i in range(len(conversation) - 1, -1, -1):
+ if conversation[i]["role"] == "user":
+ # Add image to this user message
+ conversation[i]["content"].append({"type": "image", "image": image_dict})
+ user_msg_found = True
+ break
+
+ if not user_msg_found:
+ # No user message found, create one with just the image
+ conversation.append({
+ "role": "user",
+ "content": [{"type": "image", "image": image_dict}]
+ })
+
+ # Add the assistant response
+ response_text = replace_usernames_in_text(response)
+ conversation.append({
+ "role": "assistant",
+ "content": [{"type": "text", "text": response_text}]
+ })
+
+ except json.JSONDecodeError:
+ # If input_data is not valid JSON, create simple conversation
+ text_content = replace_usernames_in_text(str(input_data).strip())
+ response_text = replace_usernames_in_text(response)
+
+ conversation = [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": text_content},
+ {"type": "image", "image": image_dict}
+ ]
+ },
+ {
+ "role": "assistant",
+ "content": [{"type": "text", "text": response_text}]
+ }
+ ]
+ except Exception as e:
+ logger.debug(f"Line {line_num}: Error parsing conversation history: {e}")
+ # Fallback to simple conversation
+ text_content = replace_usernames_in_text(str(input_data).strip())
+ response_text = replace_usernames_in_text(response)
+
+ conversation = [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": text_content},
+ {"type": "image", "image": image_dict}
+ ]
+ },
+ {
+ "role": "assistant",
+ "content": [{"type": "text", "text": response_text}]
+ }
+ ]
+
+ vision_data.append(conversation)
+
+ except json.JSONDecodeError as e:
+ logger.warning(f"Line {line_num}: JSON decode error: {e}")
+ continue
+ except Exception as e:
+ logger.warning(f"Line {line_num}: Unexpected error: {e}")
+ continue
+
+ logger.info(f"Successfully processed {len(vision_data)} vision entries")
+ return vision_data
+
+def extract_vision_conversations_from_csv(csv_input: str) -> List[Dict]:
+ """Extract vision data from CSV with input,image,output columns"""
+ if not os.path.isfile(csv_input):
+ logger.debug(f"Vision CSV file not found: {csv_input}")
+ return []
+
+ logger.info(f"Reading Vision CSV: {csv_input}")
+
+ try:
+ df = pd.read_csv(csv_input)
+ required_columns = ['input', 'image', 'output']
+
+ if not all(col in df.columns for col in required_columns):
+ logger.debug(f"Vision CSV missing required columns: {required_columns}")
+ return []
+
+ vision_data = []
+
+ for idx, row in df.iterrows():
+ try:
+ input_text = str(row['input']).strip()
+ image_b64 = str(row['image']).strip()
+ output_text = str(row['output']).strip()
+
+ if not all([input_text, image_b64, output_text]):
+ continue
+
+ # Check for bad outputs
+ if output_text in BAD_OUTPUTS:
+ continue
+
+ # Create separate replacements for each row
+ row_conversation_replacements = {}
+
+ # Replace usernames consistently within this single row
+ def replace_usernames_in_text(text: str) -> str:
+ global username_replaced_count
+ global available_minecraft_usernames
+ replaced = False
+
+ if not MINECRAFT_USERNAMES:
+ return text
+
+ for orig_name in ORIGINAL_USERNAMES:
+ if orig_name in text:
+ if orig_name not in row_conversation_replacements:
+ if not available_minecraft_usernames:
+ available_minecraft_usernames = list(MINECRAFT_USERNAMES)
+ replacement = random.choice(available_minecraft_usernames)
+ available_minecraft_usernames.remove(replacement)
+ row_conversation_replacements[orig_name] = replacement
+ replaced = True
+ text = text.replace(orig_name, row_conversation_replacements[orig_name])
+
+ if replaced:
+ username_replaced_count += 1
+ return text
+
+ input_text = replace_usernames_in_text(input_text)
+ output_text = replace_usernames_in_text(output_text)
+
+ # Load image from base64
+ image = load_image_from_base64(image_b64)
+
+ # Convert PIL image to parquet-compatible dict
+ image_filename_for_dict = f"image_from_base64_{idx}.png" # Create a placeholder filename
+ image_dict = pil_image_to_parquet_dict(image, image_filename_for_dict)
+
+ # Create conversation in unsloth format
+ conversation = [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": input_text},
+ {"type": "image", "image": image_dict}
+ ]
+ },
+ {
+ "role": "assistant",
+ "content": [{"type": "text", "text": output_text}]
+ }
+ ]
+
+ vision_data.append(conversation)
+
+ except Exception as e:
+ logger.warning(f"Row {idx}: Error processing vision data: {e}")
+ continue
+
+ logger.info(f"Successfully processed {len(vision_data)} vision entries from CSV")
+ return vision_data
+
+ except Exception as e:
+ logger.error(f"Error reading vision CSV {csv_input}: {e}")
+ return []
+
+def extract_conversations_from_csv(csv_input: str) -> List[List[Dict[str, str]]]:
+ if not os.path.isfile(csv_input):
+ logger.debug(f"CSV file not found: {csv_input}")
+ return []
+
+ logger.info(f"Reading CSV: {csv_input}")
+ valid_rows = []
+ extra_issue_rows = 0
+ total_extra_columns = 0
+
+ with open(csv_input, newline='', encoding="utf-8") as csvfile:
+ reader = csv.reader(csvfile)
+ try:
+ header = next(reader)
+ except StopIteration:
+ logger.debug(f"CSV file {csv_input} is empty.")
+ return []
+
+ header_expected = {"input", "output"}
+ header_map = {col: idx for idx, col in enumerate(header)}
+ if not header_expected.issubset(set(header)):
+ logger.debug(f"CSV header does not contain required columns: {header_expected}")
+ return []
+
+ for idx, row in enumerate(reader, start=2):
+ non_empty_count = sum(1 for field in row if field.strip() != "")
+ if non_empty_count > 2:
+ extra = non_empty_count - 2
+ extra_issue_rows += 1
+ total_extra_columns += extra
+ logger.info(f"Row {idx} has {extra} extra filled column(s); row skipped.")
+ continue
+ row_dict = {col: row[header_map[col]] if header_map[col] < len(row) else "" for col in header_expected}
+ valid_rows.append(row_dict)
+
+ logger.info(f"Excluded {extra_issue_rows} row(s) with extra columns (total extra columns: {total_extra_columns}).")
+ df = pd.DataFrame(valid_rows)
+ conversations = []
+ for idx, row in df.iterrows():
+ conv = create_conversation_thread(row)
+ if conversation_has_bad_output(conv):
+ continue
+ conversations.append(conv)
+ return conversations
+
+def extract_vision_conversations_from_csv(csv_input: str) -> List[Dict]:
+ """Extract vision data from CSV with input,image,output columns"""
+ if not os.path.isfile(csv_input):
+ logger.debug(f"Vision CSV file not found: {csv_input}")
+ return []
+
+ logger.info(f"Reading Vision CSV: {csv_input}")
+
+ try:
+ df = pd.read_csv(csv_input)
+ required_columns = ['input', 'image', 'output']
+
+ if not all(col in df.columns for col in required_columns):
+ logger.debug(f"Vision CSV missing required columns: {required_columns}")
+ return []
+
+ vision_data = []
+
+ for idx, row in df.iterrows():
+ try:
+ input_text = str(row['input']).strip()
+ image_b64 = str(row['image']).strip()
+ output_text = str(row['output']).strip()
+
+ if not all([input_text, image_b64, output_text]):
+ continue
+
+ # Check for bad outputs
+ if output_text in BAD_OUTPUTS:
+ continue
+
+ # Create separate replacements for each row
+ row_conversation_replacements = {}
+
+ # Replace usernames consistently within this single row
+ def replace_usernames_in_text(text: str) -> str:
+ global username_replaced_count
+ global available_minecraft_usernames
+ replaced = False
+
+ if not MINECRAFT_USERNAMES:
+ return text
+
+ for orig_name in ORIGINAL_USERNAMES:
+ if orig_name in text:
+ if orig_name not in row_conversation_replacements:
+ if not available_minecraft_usernames:
+ available_minecraft_usernames = list(MINECRAFT_USERNAMES)
+ replacement = random.choice(available_minecraft_usernames)
+ available_minecraft_usernames.remove(replacement)
+ row_conversation_replacements[orig_name] = replacement
+ replaced = True
+ text = text.replace(orig_name, row_conversation_replacements[orig_name])
+
+ if replaced:
+ username_replaced_count += 1
+ return text
+
+ input_text = replace_usernames_in_text(input_text)
+ output_text = replace_usernames_in_text(output_text)
+
+ # Load image from base64
+ image = load_image_from_base64(image_b64)
+
+ # Convert PIL image to parquet-compatible dict
+ image_filename_for_dict = f"image_from_base64_{idx}.png" # Create a placeholder filename
+ image_dict = pil_image_to_parquet_dict(image, image_filename_for_dict)
+
+ # Create conversation in unsloth format
+ conversation = [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": input_text},
+ {"type": "image", "image": image_dict}
+ ]
+ },
+ {
+ "role": "assistant",
+ "content": [{"type": "text", "text": output_text}]
+ }
+ ]
+
+ vision_data.append(conversation)
+
+ except Exception as e:
+ logger.warning(f"Row {idx}: Error processing vision data: {e}")
+ continue
+
+ logger.info(f"Successfully processed {len(vision_data)} vision entries from CSV")
+ return vision_data
+
+ except Exception as e:
+ logger.error(f"Error reading vision CSV {csv_input}: {e}")
+ return []
+
+def extract_conversations_from_json(json_input: str) -> List[List[Dict[str, str]]]:
+ logger.info(f"Reading JSON: {json_input}")
+ try:
+ with open(json_input, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+ except Exception as e:
+ logger.debug(f"Error reading {json_input}: {e}")
+ return []
+ conversations = []
+ for conv in data:
+ messages = []
+ if "system" in conv and conv["system"]:
+ system_text = str(conv["system"]).strip()
+ system_text = replace_reasoning_prompt(system_text)
+ messages.append({"from": "human", "value": system_text})
+ if "user" in conv and conv["user"]:
+ user_text = str(conv["user"]).strip()
+ user_text = replace_reasoning_prompt(user_text)
+ messages.append({"from": "human", "value": user_text})
+ if "assistant" in conv and conv["assistant"]:
+ assistant_text = str(conv["assistant"]).strip()
+ assistant_text = replace_reasoning_prompt(assistant_text)
+ messages.append({"from": "gpt", "value": assistant_text})
+ if messages and not conversation_has_bad_output(messages):
+ conversations.append(messages)
+ return conversations
+
+if __name__ == "__main__":
+ # Handle vision dataset processing
+ if '--vision' in sys.argv:
+ if not PANDAS_IMAGE_METHODS_AVAILABLE:
+ logger.error("pandas-image-methods is required for --vision flag. Install with: pip install pandas-image-methods")
+ sys.exit(1)
+
+ # Look for vision data files
+ vision_files = []
+
+ # Check for HuggingFace format (metadata.jsonl)
+ metadata_jsonl = "vision_dataset/metadata.jsonl"
+ if os.path.isfile(metadata_jsonl):
+ vision_files.append((metadata_jsonl, 'jsonl'))
+
+ # Check for CSV format vision logs
+ vision_csv = "vision_logs.csv"
+ if os.path.isfile(vision_csv):
+ vision_files.append((vision_csv, 'csv'))
+
+ # Check for numbered files
+ i = 1
+ while True:
+ jsonl_file = f"vision_dataset{i}/metadata.jsonl"
+ csv_file = f"vision_logs{i}.csv"
+ found_any = False
+
+ if os.path.isfile(jsonl_file):
+ vision_files.append((jsonl_file, 'jsonl'))
+ found_any = True
+ if os.path.isfile(csv_file):
+ vision_files.append((csv_file, 'csv'))
+ found_any = True
+
+ if not found_any:
+ break
+ i += 1
+
+ if not vision_files:
+ logger.error("No vision dataset files found for --vision flag!")
+ logger.info("Looking for:")
+ logger.info(" - vision_dataset/metadata.jsonl (HuggingFace format)")
+ logger.info(" - vision_logs.csv (CSV format)")
+ logger.info(" - vision_datasetN/metadata.jsonl")
+ logger.info(" - vision_logsN.csv")
+ sys.exit(1)
+
+ logger.info(f"Found {len(vision_files)} vision files: {[f for f, _ in vision_files]}")
+
+ # Process all vision files
+ all_vision_data = []
+ total_count = 0
+ file_counts = {}
+
+ for file_path, file_type in vision_files:
+ if file_type == 'jsonl':
+ vision_data = extract_vision_data_from_jsonl(file_path)
+ else: # csv
+ vision_data = extract_vision_conversations_from_csv(file_path)
+
+ file_counts[file_path] = len(vision_data)
+ all_vision_data.extend(vision_data)
+ total_count += len(vision_data)
+
+ if not all_vision_data:
+ logger.error("No valid vision data found!")
+ sys.exit(1)
+
+ # Check for tokenization flags
+ do_tokenize = '--tokenize' in sys.argv
+ tokenizer = None
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ if do_tokenize:
+ logger.info("Loading tokenizer 'unsloth/Llama-3.2-1B-Instruct-bnb-4bit'...")
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct-bnb-4bit")
+
+ # Tokenize if requested
+ if do_tokenize and tokenizer:
+ all_texts = []
+ for entry in all_vision_data:
+ all_texts.append(entry['input'])
+ all_texts.append(entry['output'])
+
+ total_tokens = 0
+ logger.info("Tokenizing vision data...")
+ for text in tqdm(all_texts, desc="Tokenizing", unit="msg"):
+ encoded = tokenizer(text, return_tensors="pt")
+ input_ids = encoded["input_ids"].to(device)
+ total_tokens += input_ids.shape[-1]
+ logger.info(f"Total tokens across all vision data: {total_tokens}")
+
+ # Remove duplicates based on conversation content
+ unique_vision_data = []
+ seen_keys = set()
+
+ for conversation in all_vision_data:
+ # Create a key from the text content of the conversation
+ key_parts = []
+ for msg in conversation:
+ if msg["role"] in ["user", "assistant"]:
+ for content_part in msg["content"]:
+ if content_part["type"] == "text":
+ key_parts.append(content_part["text"].strip())
+
+ key = tuple(key_parts)
+ if key not in seen_keys:
+ seen_keys.add(key)
+ unique_vision_data.append(conversation)
+
+ all_vision_data = unique_vision_data
+ logger.info(f"After deduplication: {len(all_vision_data)} unique vision conversations")
+
+ # Shuffle the data
+ random.shuffle(all_vision_data)
+
+ # Images are already in parquet-compatible dict format within all_vision_data
+ # No further image processing needed here before creating DataFrame
+
+ # Create DataFrame with conversations column (unsloth format)
+ df_final = pd.DataFrame({"conversations": all_vision_data})
+
+ output_parquet = "Andy_vision_conversations.parquet"
+
+ logger.info(f"Writing vision dataset to {output_parquet}")
+ try:
+ df_final.to_parquet(output_parquet, index=False)
+ abs_path = os.path.abspath(output_parquet)
+ logger.info(f"Successfully wrote vision dataset to: {abs_path}")
+ except Exception as e:
+ logger.error(f"Error writing Parquet file: {e}")
+ sys.exit(1)
+
+ logger.info(
+ f"\n"
+ f"--------------------------------------------------------------------------------------\n"
+ f"Vision conversion complete! Processed {total_count} vision conversations from {len(vision_files)} files.\n"
+ f"Replaced {username_replaced_count} usernames across conversations.\n"
+ f"Total usernames available: {len(MINECRAFT_USERNAMES)}\n"
+ f"Final dataset size: {len(all_vision_data)} unique conversations\n"
+ f"--------------------------------------------------------------------------------------\n"
+ )
+
+ # Log counts per file
+ for file_path, count in file_counts.items():
+ logger.info(f"File '{file_path}' contributed {count} conversations.")
+
+ sys.exit(0)
+
+ # Regular processing for non-vision data
+ base_filename = "Andy_pre"
+ files = []
+ i = 1
+ while True:
+ csv_file = f"{base_filename}{i}.csv"
+ json_file = f"{base_filename}{i}.json"
+ if not os.path.isfile(csv_file) and not os.path.isfile(json_file):
+ break
+ if os.path.isfile(csv_file):
+ files.append((csv_file, 'csv'))
+ if os.path.isfile(json_file):
+ files.append((json_file, 'json'))
+ i += 1
+
+ if not files:
+ logger.info("No CSV or JSON files found with pattern Andy_preN.(csv|json)")
+ sys.exit(1)
+
+ # Check for tokenization flags
+ do_tokenize = '--tokenize' in sys.argv
+ do_tokenize_largest = '--tokenize_largest' in sys.argv
+ tokenizer = None
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ if do_tokenize or do_tokenize_largest:
+ logger.info("Loading tokenizer 'unsloth/Llama-3.2-1B-Instruct-bnb-4bit'...")
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct-bnb-4bit")
+
+ logger.info(f"Found {len(files)} files: {[f for f, _ in files]}")
+ combined_conversations = []
+ total_count = 0
+ file_conversation_counts = {}
+
+ for file, ftype in files:
+ if ftype == 'csv':
+ convs = extract_conversations_from_csv(file)
+ else:
+ convs = extract_conversations_from_json(file)
+ file_conversation_counts[file] = len(convs)
+ combined_conversations.extend(convs)
+ total_count += len(convs)
+
+ # Tokenize all data and count tokens
+ if do_tokenize:
+ all_texts = [msg["value"] for conv in combined_conversations for msg in conv]
+ total_tokens = 0
+ logger.info("Tokenizing all data with progress bar and GPU acceleration...")
+ for text in tqdm(all_texts, desc="Tokenizing", unit="msg"):
+ encoded = tokenizer(text, return_tensors="pt")
+ input_ids = encoded["input_ids"].to(device)
+ total_tokens += input_ids.shape[-1]
+ logger.info(f"Total tokens across all data: {total_tokens}")
+
+ # Tokenize 5 largest conversations
+ if do_tokenize_largest:
+ conv_token_counts = []
+ logger.info("Tokenizing largest conversations with progress bar and GPU acceleration...")
+ for conv in tqdm(combined_conversations, desc="Tokenizing convs", unit="conv"):
+ text = "\n".join(msg["value"] for msg in conv)
+ encoded = tokenizer(text, return_tensors="pt")
+ input_ids = encoded["input_ids"].to(device)
+ conv_token_counts.append((input_ids.shape[-1], conv))
+ # sort and take top 5
+ conv_token_counts.sort(key=lambda x: x[0], reverse=True)
+ top5 = conv_token_counts[:5]
+ max_tokens = max(count for count, _ in top5)
+ for idx, (count, _) in enumerate(top5, 1):
+ logger.info(f"Top {idx} conversation tokens: {count}")
+ logger.info(f"Maximum tokens in top 5: {max_tokens}")
+
+ # Clean up GPT messages
+ for conv in combined_conversations:
+ for msg in conv:
+ if msg["from"] == "gpt":
+ msg["value"] = msg["value"].replace("\nundefined\n", "").replace("\nundefined", "").strip()
+
+ unique_conversations = []
+ seen_keys = set()
+ for conv in combined_conversations:
+ if len(conv) < 2:
+ key = tuple(msg["value"] for msg in conv)
+ else:
+ key = (conv[0]["value"].strip(), conv[-1]["value"].strip())
+ if key not in seen_keys:
+ seen_keys.add(key)
+ unique_conversations.append(conv)
+ combined_conversations = unique_conversations
+
+ random.shuffle(combined_conversations)
+
+ # Handle codeOnly flag
+ if '--codeOnly' in sys.argv:
+ coding = []
+ noncoding = []
+ for conv in combined_conversations:
+ has_code = any("```" in msg["value"] for msg in conv) or (
+ conv and conv[-1]["from"] == "gpt" and "!newAction(" in conv[-1]["value"]
+ )
+ if has_code:
+ coding.append(conv)
+ else:
+ noncoding.append(conv)
+ logger.info(f"Found {len(coding)} coding examples and {len(noncoding)} non-coding examples.")
+ noncoding_count = int(round(0.15 * len(coding)))
+ if noncoding_count > len(noncoding):
+ noncoding_count = len(noncoding)
+ selected_noncoding = random.sample(noncoding, noncoding_count) if noncoding_count > 0 else []
+ final_conversations = coding + selected_noncoding
+ random.shuffle(final_conversations)
+ combined_conversations = final_conversations
+
+ if '--codeOnly' in sys.argv:
+ df_final = pd.DataFrame({"conversations": combined_conversations})
+ output_parquet = "Andy_conversations_codeOnly.parquet"
+ else:
+ df_final = pd.DataFrame({"conversations": combined_conversations})
+ output_parquet = "Andy_conversations.parquet"
+
+ logger.info(f"Writing output to {output_parquet}")
+ try:
+ df_final.to_parquet(output_parquet, index=False)
+ abs_path = os.path.abspath(output_parquet)
+ logger.info(f"Successfully wrote output to: {abs_path}")
+ except Exception as e:
+ logger.debug(f"Error writing Parquet file: {e}")
+ sys.exit(1)
+
+ logger.info(
+ f"\n"
+ f"--------------------------------------------------------------------------------------\n\n"
+ f"Conversion complete! Processed {total_count} conversations from {len(files)} files. \n"
+ f"Replaced {username_replaced_count} usernames across {total_count} conversations. \n"
+ f"Total amount of usernames to choose from: {len(MINECRAFT_USERNAMES)} (removed {duplicate_count} duplicates) \n"
+ f"--------------------------------------------------------------------------------------\n\n"
+ )
+
+ # Log conversation counts per file.
+ for file, count in file_conversation_counts.items():
+ logger.info(f"File '{file}' contributed {count} conversations.")
diff --git a/logs/generate_usernames.py b/logs/generate_usernames.py
new file mode 100644
index 0000000..ede8c00
--- /dev/null
+++ b/logs/generate_usernames.py
@@ -0,0 +1,1117 @@
+# -*- coding: utf-8 -*-
+# ^^^ Add encoding declaration for potentially wider character sets in lists
+# --- Imports ---
+import random
+import os
+import sys # Import sys to access command-line arguments
+import itertools # Import itertools for generating combinations
+
+
+# Increase recursion depth if needed for large set operations (unlikely but possible)
+# sys.setrecursionlimit(2000)
+
+# --- Massively Expanded Word Lists (Targeting 750+ unique per category) ---
+
+# NOTE: Generating truly meaningful and diverse lists of this size requires
+# significant effort or large external datasets. These lists are expanded
+# considerably using thematic variations, synonyms, and related concepts.
+# They aim for the quantity requested, combining common and more specific terms.
+
+PROFESSIONS = list(set([
+ # Core & Fantasy
+ "Wizard", "Maven", "Guru", "Master", "Apprentice", "Hunter", "Gatherer",
+ "Coder", "Artist", "Chef", "Pilot", "Doctor", "Teacher", "Scientist",
+ "Musician", "Gamer", "Writer", "Explorer", "Builder", "Creator",
+ "Analyst", "Architect", "Strategist", "Voyager", "Dreamer", "Engineer",
+ "Designer", "Bard", "Rogue", "Paladin", "Alchemist", "Druid", "Ranger",
+ "Sentinel", "Guardian", "Navigator", "Captain", "Commander", "Sergeant",
+ "Healer", "Oracle", "Sage", "Scholar", "Scribe", "Merchant", "Trader",
+ "Blacksmith", "Jeweler", "Cartographer", "Monk", "Necromancer", "Summoner",
+ "Technomancer", "Hacker", "Broker", "Agent", "Scout", "Spy", "Jester",
+ "Minstrel", "Curator", "Warden", "Keeper", "Chronicler", "Inventor",
+ "Mechanist", "Artificer", "Gladiator", "Nomad", "Hermit", "Shaman",
+ "Geologist", "Biologist", "Physicist", "Astronomer", "Linguist", "Historian",
+ "Philosopher", "Enforcer", "Detective", "Journalist", "Photographer", "Sculptor",
+ # Expansion
+ "Mage", "Sorcerer", "Warlock", "Cleric", "Priest", "Templar", "Crusader",
+ "Berserker", "Barbarian", "Warrior", "Knight", "Duelist", "Swashbuckler",
+ "Assassin", "Thief", "Ninja", "Samurai", "Ronin", "Geomancer", "Pyromancer",
+ "Cryomancer", "Aeromancer", "Hydromancer", "Chronomancer", "Illusionist",
+ "Enchanter", "Runesmith", "Wordsmith", "Beastmaster", "Tamer", "Falconer",
+ "Herbalist", "Apothecary", "Poisoner", "Tinkerer", "Demolitionist",
+ "Pathfinder", "Trailblazer", "Surveyor", "Prospector", "Miner", "Lumberjack",
+ "Farmer", "Fisherman", "Shepherd", "Vintner", "Brewer", "Baker", "Butcher",
+ "Candlemaker", "Cobbler", "Cooper", "Fletcher", "Innkeeper", "Mason",
+ "Potter", "Sailor", "Shipwright", "Tailor", "Tanner", "Weaver", "Woodcarver",
+ "Governor", "Chancellor", "Diplomat", "Ambassador", "Councilor", "Judge",
+ "Librarian", "Archivist", "Mathematician", "Astronomer", "Botanist", "Zoologist",
+ "Archeologist", "Anthropologist", "Sociologist", "Psychologist", "Mentor",
+ "Tutor", "Instructor", "Professor", "Dean", "Headmaster", "Principal",
+ "Acolyte", "Initiate", "Neophyte", "Disciple", "Follower", "Zealot", "Cultist",
+ "Prophet", "Seer", "Diviner", "Mystic", "Visionary", "Ascetic", "Pilgrim",
+ "Mercenary", "BountyHunter", "Privateer", "Corsair", "Smuggler", "Outlaw",
+ "Bandit", "Rebel", "Revolutionary", "FreedomFighter", "Gladiator",
+ "Charioteer", "Pitfighter", "Champion", "Hero", "Villain", "Antihero",
+ "Adventurer", "Soldier", "Officer", "General", "Admiral", "Marshal",
+ "Tactician", "Quartermaster", "Medic", "CombatMedic", "FieldAgent",
+ "Operative", "DoubleAgent", "Infiltrator", "Saboteur", "Courier", "Messenger",
+ "Herald", "TownCrier", "Guide", "Interpreter", "Translator", "Negotiator",
+ "Arbitrator", "Mediator", "Executioner", "Jailer", "Constable", "Sheriff",
+ "Bailiff", "Investigator", "Foreman", "Supervisor", "Manager", "Director",
+ "Executive", "Administrator", "Secretary", "Clerk", "Accountant", "Auditor",
+ "Actuary", "Banker", "Financier", "Investor", "Speculator", "Entrepreneur",
+ "Artisan", "Craftsman", "Technician", "Mechanic", "Operator", "Programmer",
+ "Developer", "SysAdmin", "NetAdmin", "DBAdmin", "Webmaster", "ContentCreator",
+ "Influencer", "Blogger", "Vlogger", "Podcaster", "Streamer", "Moderator",
+ "Animator", "Illustrator", "Painter", "Engraver", "Printer", "Composer",
+ "Arranger", "Conductor", "Performer", "Actor", "Dancer", "Choreographer",
+ "Orator", "Storyteller", "Poet", "Playwright", "Novelist", "Editor",
+ "Publisher", "Critic", "Reviewer", "Commentator", "Pundit", "Host",
+ "Announcer", "Reporter", "Anchor", "Correspondent", "Cameraman", "Director",
+ "Producer", "SoundEngineer", "LightingTech", "SetDesigner", "Costumer",
+ "MakeupArtist", "Stylist", "Barber", "Beautician", "Therapist", "Counselor",
+ "Coach", "Trainer", "Dietitian", "Nurse", "Surgeon", "Dentist", "Optometrist",
+ "Pharmacist", "Paramedic", "Veterinarian", "Caretaker", "Nanny", "Butler",
+ "Maid", "Valet", "Chauffeur", "Bodyguard", "Bouncer", "Doorman", "Concierge",
+ "Bellhop", "Waiter", "Bartender", "Sommelier", "Barista", "FlightAttendant",
+ "Librarian", "MuseumGuide", "ParkRanger", "Lifeguard", "Firefighter",
+ "PoliceOfficer", "Detective", "Profiler", "IntelligenceAgent", "Analyst",
+ "Cryptographer", "Codebreaker", "Linguist", "Archivist", "Researcher",
+ "LabTechnician", "FieldResearcher", "Experimentalist", "Theorist", "Statistician",
+ "DataScientist", "MachineLearningEngineer", "AI_Specialist", "Roboticist",
+ "NetworkEngineer", "SecurityAnalyst", "PenTester", "EthicalHacker",
+ "ForensicAnalyst", "GameDeveloper", "LevelDesigner", "NarrativeDesigner",
+ "SoundDesigner", "Tester", "QA_Engineer", "CommunityManager", "SupportAgent",
+ "Salesperson", "Marketer", "Advertiser", "PR_Specialist", "Recruiter",
+ "HR_Manager", "Lawyer", "Paralegal", "Judge", "Politician", "Activist",
+ "Lobbyist", "UnionRep", "Volunteer", "Philanthropist", "SocialWorker",
+ "Consultant", "Freelancer", "Contractor", "GigWorker", "SoleProprietor",
+ "Journeyman", "Expert", "Virtuoso", "Prodigy", "Maestro", "Specialist",
+ "Generalist", "Pioneer", "Innovator", "Futurist", "Visionary", "Leader",
+ "Follower", "Helper", "Assistant", "Associate", "Partner", "Collaborator",
+ "Competitor", "Rival", "Mentor", "Protege", "Patron", "Client", "Customer",
+ "Patient", "Student", "Citizen", "Resident", "Immigrant", "Expatriate",
+ "Refugee", "Tourist", "Traveler", "Wanderer", "Drifter", "Outcast", "Exile",
+ "Survivor", "Witness", "Observer", "Participant", "Subject", "Candidate",
+ "Contender", "Challenger", "Victor", "Loser", "Slave", "Servant", "Peasant",
+ "Serf", "Commoner", "Nobleman", "Aristocrat", "Royalty", "Emperor", "King",
+ "Queen", "Prince", "Princess", "Duke", "Duchess", "Marquis", "Count",
+ "Viscount", "Baron", "Lord", "Lady", "Sir", "Dame", "Esquire", "Gentleman",
+ # Add more niche/specific/combined roles if needed to reach 750
+ "SkyCaptain", "DeepMiner", "GeneSplicer", "MemeLord", "DataWrangler",
+ "SynthWeaver", "BioHacker", "RealityBender", "VoidWalker", "StarSeer",
+ "TimeWarden", "SoulBinder", "ShadowDancer", "LightBringer", "StormCaller",
+ "EarthShaker", "FlameWielder", "IceShaper", "PlantWhisperer", "MetalShaper",
+ "BloodMage", "SpiritTalker", "DreamWalker", "NightmareWeaver", "ChaosAgent",
+ "OrderKeeper", "TruthSeeker", "LieSmith", "FateSpinner", "DoomBringer",
+ "HopeBearer", "MemoryKeeper", "LoreMaster", "MythMaker", "LegendSeeker",
+ "ClockMaker", "MapMaker", "ToyMaker", "Perfumer", "GloveMaker", "HatMaker",
+ "LockSmith", "GemCutter", "GlassBlower", "StoneMason", "RoadBuilder",
+ "BridgeBuilder", "CanalDigger", "WellDigger", "ChimneySweep", "RatCatcher",
+ "GongFarmer", "Mudlark", "Scavenger", "Recycler", "JunkDealer", "PawnBroker",
+ "MoneyLender", "BookBinder", "Illuminator", "Calligrapher", "Courtier",
+ "Emissary", "Legate", "Envoy", "Plenipotentiary", "Spymaster", "AssassinGuildLeader",
+ "ThiefGuildMaster", "MercenaryCaptain", "PirateKing", "Warlord", "Chieftain",
+ "TribalElder", "MedicineMan", "WitchDoctor", "HighPriest", "Abbot", "Bishop",
+ "Cardinal", "Pope", "Imam", "Rabbi", "Guru", "Sensei", "Roshi", "Lama",
+ "DruidArchon", "RangerLord", "PaladinOrderMaster", "Archmage", "MasterAssassin",
+ "Grandmaster", "CelestialPilot", "QuantumPhysicist", "NeuroScientist",
+ "AstroBiologist", "CryptoZoologist", "ParaPsychologist", "Ufologist",
+ "ConspiracyTheorist", "MythBuster", "FactChecker", "Debunker", "Propagandist",
+ "SpinDoctor", "Satirist", "Parodist", "Impersonator", "Mimic", "Ventriloquist",
+ "Puppeteer", "CircusMaster", "RingLeader", "Acrobat", "Contortionist",
+ "Strongman", "KnifeThrower", "FireEater", "SwordSwallower", "Magician",
+ "EscapeArtist", "Mentalist", "Hypnotist", "AnimalTrainer", "Clown", "Harlequin",
+ "Pierrot", "Pantomime", "CharacterActor", "Stuntman", "VoiceActor", "Narrator",
+ "Auctioneer", "Realtor", "Surveyor", "Appraiser", "InsuranceAgent",
+ "Underwriter", "ClaimsAdjuster", "LossPreventer", "SecurityGuard",
+ "AirTrafficController", "TrainConductor", "BusDriver", "TaxiDriver",
+ "Trucker", "DeliveryDriver", "Dispatcher", "Logistician", "SupplyChainManager",
+ "WarehouseWorker", "ForkliftOperator", "CraneOperator", "HeavyEquipmentOp",
+ "Welder", "Pipefitter", "Electrician", "Plumber", "HVACTech", "Carpenter",
+ "Roofer", "Painter", "Drywaller", "Floorer", "TileSetter", "Landscaper",
+ "Arborist", "Groundskeeper", "PoolCleaner", "Exterminator", "Janitor",
+ "Custodian", "SanitationWorker", "RecyclingOperator", "DemolitionWorker",
+ "HazardousMaterialsTech", "SafetyInspector", "BuildingInspector", "FoodInspector",
+ "HealthInspector", "CustomsOfficer", "ImmigrationOfficer", "BorderPatrolAgent",
+ "ParkRanger", "FishAndGameWarden", "Forester", "Conservationist",
+ "Ecologist", "Oceanographer", "Meteorologist", "Climatologist", "Volcanologist",
+ "Seismologist", "Paleontologist", "Mineralogist", "Petrologist", "Hydrologist",
+ "Glaciologist", "SoilScientist", "Agronomist", "Horticulturist", "Florist",
+ "Ichthyologist", "Herpetologist", "Ornithologist", "Entomologist", "Mammalogist",
+ "Primatologist", "Microbiologist", "Virologist", "Bacteriologist", "Mycologist",
+ "Parasitologist", "Immunologist", "Geneticist", "Epidemiologist", "Toxicologist",
+ "Pharmacologist", "Pathologist", "Radiologist", "Anesthesiologist", "Cardiologist",
+ "Dermatologist", "Endocrinologist", "Gastroenterologist", "Hematologist",
+ "Nephrologist", "Neurologist", "Oncologist", "Ophthalmologist", "Orthopedist",
+ "Otolaryngologist", "Pediatrician", "Psychiatrist", "Pulmonologist", "Rheumatologist",
+ "Urologist", "Podiatrist", "Chiropractor", "Acupuncturist", "MassageTherapist",
+ "PhysicalTherapist", "OccupationalTherapist", "SpeechTherapist", "Audiologist",
+ "Midwife", "Doula", "Mortician", "Embalmer", "Coroner", "MedicalExaminer",
+ "ForensicScientist", "BallisticsExpert", "FingerprintAnalyst", "DNAAnalyst",
+ "DocumentExaminer", "ArsonInvestigator", "AccidentReconstructionist",
+ "PolygraphExaminer", "K9Officer", "MountedPolice", "SWATOfficer", "HostageNegotiator",
+ "BombTechnician", "AirMarshal", "SecretServiceAgent", "FBI_Agent", "CIA_Agent",
+ "NSA_Analyst", "DEA_Agent", "ATF_Agent", "US_Marshal", "DiplomaticSecurity",
+ "MilitaryPolice", "CoastGuard", "Infantryman", "Artilleryman", "CavalryScout",
+ "TankCommander", "CombatEngineer", "Pilot", "Navigator", "DroneOperator",
+ "Submariner", "SEAL", "GreenBeret", "Ranger", "DeltaForce", "Pararescueman",
+ "IntelligenceOfficer", "LogisticsOfficer", "PersonnelOfficer", "PublicAffairs",
+ "Chaplain", "MilitaryLawyer", "MilitaryDoctor", "FlightSurgeon", "CyberWarfare",
+ "SpaceForceGuardian", "TestPilot", "Astronaut", "MissionControl", "RocketScientist",
+ "SatelliteTech", "SpaceSystemsOp", "PlanetaryScientist", "ExoBiologist",
+ "Terraformer", "AstroMiner", "StellarCartographer", "WarpFieldSpecialist",
+ "Cyberneticist", "AndroidTechnician", "AI_Psychologist", "SynthProgrammer",
+ "HoloDesigner", "VR_Architect", "NeuralInterfaceTech", "BioEnhancementSpec",
+ "CloningTechnician", "CryonicsSpecialist", "Nanotechnologist", "QuantumMechanic",
+ "ZeroG_Welder", "AsteroidMiner", "LunarGeologist", "MartianBotanist",
+ "TitanFisherman", "EuropaExplorer", "GasGiantProspector", "VoidSurveyor",
+ "AlienLinguist", "XenoAnthropologist", "FirstContactSpec", "GalacticDiplomat",
+ "StarshipCaptain", "FleetAdmiral", "SectorCommander", "PlanetaryGovernor",
+ "ImperialGuard", "RebelLeader", "SmugglerCaptain", "BountyGuildMaster",
+ "InfoBroker", "CyberRunner", "StreetSamurai", "Rigger", "Decker", "Technoshaman",
+ "DataThief", "CorpSecOfficer", "Fixer", "Ripperdoc", "Joytech", "SimstimArtist",
+ "MediaProducer", "Netcaster", "TruthSayer", "ProphetOfWoe", "CultLeader",
+ "DoomsdayPrepper", "Survivalist", "Homesteader", "Recluse", "Misanthrope",
+ "Philanthropist", "Humanitarian", "Activist", "Advocate", "Organizer",
+ "Educator", "Motivator", "Inspirer", "RoleModel", "Iconoclast", "Maverick",
+ "Renegade", "Pioneer", "Trailblazer", "StandardBearer", "Vanguard", "Luminary", "Andy-4-"
+]))
+
+ADJECTIVES = list(set([
+ # Core
+ "Code", "Music", "Official", "Streamer", "Tech", "Starry", "Simple",
+ "Big", "Gaming", "Workout", "DIY", "Mindful", "Foodie", "Travel",
+ "Pixel", "Byte", "Data", "Synth", "Digital", "Analog", "Creative",
+ "Brave", "Happy", "Strong", "Quiet", "Agile", "Electric", "Mystic",
+ "Fierce", "Clever", "Speedy", "Golden", "Silver", "Cosmic", "Infinite",
+ "Quantum", "Stealthy", "Radiant", "Crimson", "Azure", "Mysterious",
+ "Vivid", "Silent", "Roaring", "Frozen", "Burning", "Virtual", "Cyber",
+ "Galactic", "Stellar", "Solar", "Lunar", "Arcane", "Ancient", "Forgotten",
+ "Hidden", "Secret", "Whispering", "Shadowy", "Luminous", "Glowing",
+ "Magnetic", "Sonic", "Crystal", "Diamond", "Emerald", "Ruby", "Sapphire",
+ "Bronze", "Iron", "Steel", "Obsidian", "Molten", "Icy", "Blazing",
+ "Stormy", "Windy", "Rainy", "Sunny", "Cloudy", "Misty", "Ethereal",
+ "Nimble", "Swift", "Bold", "Noble", "Regal", "Royal", "Humble",
+ "Gentle", "Savage", "Wild", "Primal", "Eternal", "Boundless", "Supreme",
+ "Ultimate", "Perfect", "Flawless", "Broken", "Glitched", "Corrupted",
+ "Sacred", "Hallowed", "Cursed", "Haunted", "Undead", "Living", "Breathing",
+ "Mechanical", "Organic", "Temporal", "Spatial", "Abstract", "Concrete",
+ "Logical", "Chaotic", "Mythic", "Legendary", "Epic", "Rare", "Common",
+ # Expansion
+ "Grand", "Great", "Small", "Tiny", "Huge", "Massive", "Micro", "Nano",
+ "Quick", "Slow", "Fast", "Rapid", "Sudden", "Gradual", "Patient", "Eager",
+ "Calm", "Angry", "Furious", "Peaceful", "Serene", "Turbulent", "Violent",
+ "Kind", "Cruel", "Mean", "Nice", "Generous", "Stingy", "Selfish", "Altruistic",
+ "Honest", "Deceitful", "True", "False", "Fake", "Genuine", "Authentic",
+ "Loyal", "Treacherous", "Faithful", "Fickle", "Brave", "Cowardly", "Timid",
+ "Fearless", "Courageous", "Daring", "Reckless", "Cautious", "Prudent",
+ "Wise", "Foolish", "Ignorant", "Knowledgeable", "Learned", "Erudite",
+ "Simple", "Complex", "Intricate", "Elaborate", "Plain", "Ornate", "Fancy",
+ "Beautiful", "Ugly", "Hideous", "Gorgeous", "Attractive", "Repulsive",
+ "Clean", "Dirty", "Filthy", "Pristine", "Pure", "Tainted", "Polluted",
+ "Bright", "Dim", "Dark", "Gloomy", "Murky", "Shining", "Gleaming", "Dull",
+ "Sharp", "Blunt", "Pointed", "Rounded", "Smooth", "Rough", "Coarse", "Fine",
+ "Hard", "Soft", "Firm", "Flabby", "Rigid", "Flexible", "Pliant", "Stiff",
+ "Heavy", "Light", "Weightless", "Dense", "Sparse", "Thick", "Thin",
+ "Wide", "Narrow", "Broad", "Slim", "Fat", "Skinny", "Lean", "Stout",
+ "Tall", "Short", "Long", "Brief", "High", "Low", "Deep", "Shallow",
+ "Hot", "Cold", "Warm", "Cool", "Tepid", "Frigid", "Scalding", "Arctic",
+ "Tropical", "Temperate", "Arid", "Humid", "Dry", "Wet", "Damp", "Soggy",
+ "Loud", "Noisy", "Silent", "Mute", "Hushed", "Resonant", "Melodious",
+ "Harmonious", "Discordant", "Cacophonous", "Sweet", "Sour", "Bitter",
+ "Salty", "Spicy", "Savory", "Bland", "Tasty", "Delicious", "Nasty",
+ "Fragrant", "Aromatic", "Pungent", "Stinky", "Odorous", "Scented",
+ "Red", "Orange", "Yellow", "Green", "Blue", "Purple", "Violet", "Indigo",
+ "Pink", "Brown", "Black", "White", "Gray", "Beige", "Cream", "Maroon",
+ "Navy", "Teal", "Aqua", "Lime", "Olive", "Gold", "Copper", "Platinum",
+ "Chromatic", "Iridescent", "Opalescent", "Pearly", "Metallic", "Matte",
+ "Glossy", "Transparent", "Translucent", "Opaque", "Clear", "Cloudy",
+ "Young", "Old", "New", "Aged", "Antique", "Modern", "Futuristic", "Retro",
+ "Primeval", "Prehistoric", "Medieval", "Victorian", "Contemporary",
+ "Living", "Dead", "Undead", "Spectral", "Ghostly", "Phantom", "Corporeal",
+ "Physical", "Mental", "Spiritual", "Emotional", "Psychic", "Astral",
+ "Divine", "Infernal", "Demonic", "Angelic", "Celestial", "Fey", "Elemental",
+ "Natural", "Artificial", "Synthetic", "Simulated", "Augmented", "Bionic",
+ "Robotic", "Clockwork", "SteamPowered", "Nuclear", "SolarPowered", "WindPowered",
+ "GeoThermal", "BioLuminescent", "Photosynthetic", "Radioactive", "Toxic",
+ "Venomous", "Poisonous", "Inert", "Volatile", "Stable", "Unstable",
+ "Explosive", "Implosive", "Acidic", "Alkaline", "Neutral", "Charged",
+ "Magnetic", "Conductive", "Insulating", "Resistant", "Absorbent", "Reflective",
+ "Emissive", "Stealthy", "Visible", "Invisible", "Camouflaged", "Disguised",
+ "Known", "Unknown", "Familiar", "Strange", "Exotic", "Foreign", "Alien",
+ "Native", "Indigenous", "Local", "Regional", "National", "Global", "Universal",
+ "Public", "Private", "Personal", "Communal", "Collective", "Individual",
+ "Open", "Closed", "Locked", "Sealed", "Guarded", "Protected", "Vulnerable",
+ "Exposed", "Secure", "Insecure", "Safe", "Dangerous", "Hazardous", "Risky",
+ "Beneficial", "Harmful", "Helpful", "Useless", "Useful", "Valuable",
+ "Worthless", "Priceless", "Cheap", "Expensive", "Affordable", "Luxurious",
+ "Basic", "Advanced", "Fundamental", "Essential", "Optional", "Mandatory",
+ "Required", "Forbidden", "Permitted", "Legal", "Illegal", "Lawful", "Unlawful",
+ "Ethical", "Unethical", "Moral", "Immoral", "Amoral", "Just", "Unjust",
+ "Fair", "Unfair", "Right", "Wrong", "Correct", "Incorrect", "Accurate",
+ "Inaccurate", "Precise", "Imprecise", "Vague", "Definite", "Ambiguous",
+ "Certain", "Uncertain", "Probable", "Improbable", "Possible", "Impossible",
+ "Real", "Unreal", "Imaginary", "Fictional", "Factual", "Symbolic", "Literal",
+ "Abstract", "Figurative", "Empty", "Full", "Hollow", "Solid", "Filled",
+ "Vacant", "Occupied", "Crowded", "Deserted", "Isolated", "Connected",
+ "Linked", "Separate", "United", "Divided", "Whole", "Partial", "Complete",
+ "Incomplete", "Finished", "Unfinished", "Perfect", "Imperfect", "Damaged",
+ "Intact", "Operational", "Defective", "Functional", "Dysfunctional",
+ "Healthy", "Sick", "Injured", "Wounded", "Healed", "Diseased", "Immune",
+ "Alive", "Animated", "Inanimate", "Conscious", "Unconscious", "Sentient",
+ "Sapient", "Intelligent", "Mindless", "Aware", "Oblivious", "Alert",
+ "Drowsy", "Sleeping", "Awake", "Dreaming", "Lucid", "Nightmarish",
+ "Hopeful", "Hopeless", "Optimistic", "Pessimistic", "Joyful", "Sorrowful",
+ "Cheerful", "Gloomy", "Excited", "Bored", "Interested", "Indifferent",
+ "Passionate", "Apathetic", "Loving", "Hateful", "Friendly", "Hostile",
+ "Welcoming", "Suspicious", "Trusting", "Distrustful", "Gullible", "Skeptical",
+ "Naive", "Cynical", "Innocent", "Guilty", "Blameless", "Responsible",
+ "Free", "Captive", "Enslaved", "Liberated", "Independent", "Dependent",
+ "Autonomous", "Subordinate", "Dominant", "Submissive", "Equal", "Unequal",
+ "Superior", "Inferior", "Primary", "Secondary", "Tertiary", "Major", "Minor",
+ "Significant", "Insignificant", "Crucial", "Trivial", "Urgent", "Routine",
+ "Special", "Ordinary", "Normal", "Abnormal", "Typical", "Atypical",
+ "Standard", "Custom", "Unique", "Generic", "Specific", "General",
+ "Universal", "Particular", "Consistent", "Inconsistent", "Reliable",
+ "Unreliable", "Predictable", "Unpredictable", "Stable", "Erratic",
+ "Constant", "Variable", "Fixed", "Adjustable", "Static", "Dynamic",
+ "Active", "Passive", "Inert", "Reactive", "Proactive", "Responsive",
+ "Sensitive", "Insensitive", "Delicate", "Robust", "Fragile", "Durable",
+ "Temporary", "Permanent", "Ephemeral", "Lasting", "Fleeting", "Enduring",
+ "Ancient", "Timeless", "Momentary", "Instantaneous", "Protracted",
+ "Forthcoming", "Past", "Present", "Future", "Initial", "Final", "Penultimate",
+ "Sequential", "Simultaneous", "Concurrent", "Asynchronous", "Synchronous",
+ "Parallel", "Serial", "Linear", "Nonlinear", "Cyclical", "Spiral",
+ "Random", "Ordered", "Structured", "Unstructured", "Organized", "Disorganized",
+ "Systematic", "Haphazard", "Methodical", "Intuitive", "Rational", "Irrational",
+ "Logical", "Illogical", "Coherent", "Incoherent", "Articulate", "Inarticulate",
+ "Eloquent", "Mumbling", "Fluent", "Stuttering", "Clear", "Obscure",
+ "Explicit", "Implicit", "Direct", "Indirect", "Subtle", "Obvious",
+ "Manifest", "Latent", "Overt", "Covert", "Public", "Confidential",
+ "Classified", "TopSecret", "Unclassified", "Encoded", "Decoded", "Encrypted",
+ "Plaintext", "Austere", "Lavish", "Minimalist", "Baroque", "Ornate",
+ "Utilitarian", "Decorative", "Functional", "Ceremonial", "Ritualistic",
+ "Sacrificial", "Consecrated", "Desecrated", "Blessed", "Cursed", "Enchanted",
+ "Magical", "Mundane", "Ordinary", "Extraordinary", "Supernatural",
+ "Paranormal", "Preternatural", "Otherworldly", "Uncanny", "Weird", "Bizarre",
+ "Grotesque", "Surreal", "Absurd", "Comical", "Tragic", "Dramatic", "Lyrical",
+ "Poetic", "Prosaic", "Musical", "Rhythmic", "Silent", "Still", "Moving",
+ "Flowing", "Stagnant", "Vibrant", "Dull", "Energetic", "Lethargic",
+ "Restless", "Peaceful", "Manic", "Depressed", "Anxious", "Relaxed",
+ "Tense", "Loose", "Tight", "Slack", "Strained", "Comfortable", "Uncomfortable",
+ "Painful", "Painless", "Pleasant", "Unpleasant", "Agreeable", "Disagreeable",
+ "Satisfying", "Unsatisfying", "Fulfilling", "Frustrating", "Rewarding",
+ "Punishing", "Addictive", "Repulsive", "Alluring", "Tempting", "Forbidden",
+ "Sanctioned", "Approved", "Rejected", "Accepted", "Denied", "Confirmed",
+ "Refuted", "Verified", "Unverified", "Proven", "Unproven", "Tested",
+ "Untested", "Experimental", "Theoretical", "Practical", "Applied", "Pure",
+ "Academic", "Vocational", "Professional", "Amateur", "Expert", "Novice",
+ "Skilled", "Unskilled", "Talented", "Gifted", "Mediocre", "Incompetent",
+ "Proficient", "Deficient", "Capable", "Incapable", "Able", "Unable",
+ "Ready", "Unready", "Willing", "Unwilling", "Forced", "Voluntary",
+ "Compulsory", "Elective", "Chosen", "Imposed", "Innate", "Acquired",
+ "Inherited", "Learned", "Instinctive", "Conditioned", "Habitual", "Sporadic",
+ "Frequent", "Infrequent", "Rare", "Ubiquitous", "Endemic", "Epidemic",
+ "Pandemic", "Contagious", "Infectious", "Sterile", "Fertile", "Barren",
+ "Productive", "Unproductive", "Fruitful", "Futile", "Effective", "Ineffective",
+ "Efficient", "Inefficient", "Optimal", "Suboptimal", "Adequate", "Inadequate",
+ "Sufficient", "Insufficient", "Abundant", "Scarce", "Plentiful", "Meager",
+ "Rich", "Poor", "Wealthy", "Impoverished", "Prosperous", "Destitute",
+ "Lucky", "Unlucky", "Fortunate", "Unfortunate", "Blessed", "Doomed",
+ "Fated", "Random", "Destined", "Accidental", "Intentional", "Unintentional",
+ "Deliberate", "Spontaneous", "Calculated", "Impulsive", "Planned", "Unplanned",
+ "Expected", "Unexpected", "Surprising", "Predictable", "Inevitable", "Avoidable",
+ "Escapable", "Inescapable", "Solvable", "Insolvable", "Answerable", "Unanswerable",
+ "Known", "Unknowable", "Finite", "Measurable", "Immeasurable", "Comparable",
+ "Incomparable", "Related", "Unrelated", "Relevant", "Irrelevant", "Appropriate",
+ "Inappropriate", "Suitable", "Unsuitable", "Fitting", "Unfitting", "Seemly",
+ "Unseemly", "Decent", "Indecent", "Modest", "Arrogant", "Proud", "Vain",
+ "Humble", "Meek", "Assertive", "Aggressive", "Passive", "Docile", "Rebellious",
+ "Compliant", "Defiant", "Obedient", "Disobedient", "Respectful", "Disrespectful",
+ "Courteous", "Rude", "Polite", "Impolite", "Considerate", "Inconsiderate",
+ "Thoughtful", "Thoughtless", "Tactful", "Tactless", "Diplomatic", "Blunt",
+ "Subtle", "Frank", "Candid", "Reserved", "Outgoing", "Introverted", "Extroverted",
+ "Ambiverted", "Sociable", "Antisocial", "Solitary", "Gregarious", "Aloof",
+ "Approachable", "Distant", "Warm", "Cold", "Friendly", "Unfriendly", "Charming",
+ "Repellent", "Engaging", "Boring", "Interesting", "Dull", "Fascinating",
+ "Tedious", "Stimulating", "Monotonous", "Varied", "Diverse", "Homogeneous",
+ "Uniform", "Eclectic", "Assorted", "Miscellaneous", "Purebred", "Hybrid",
+ "Mixed", "Segregated", "Integrated", "Unified", "Fragmented", "Cohesive",
+ "Disparate", "Congruent", "Incongruent", "Compatible", "Incompatible",
+ "Harmonious", "Clashing", "Aligned", "Misaligned", "Balanced", "Unbalanced",
+ "Symmetrical", "Asymmetrical", "Centered", "OffCenter", "Level", "Slanted",
+ "Vertical", "Horizontal", "Diagonal", "Perpendicular", "Parallel", "Intersecting",
+ "Tangent", "Concentric", "Eccentric", "Orthogonal", "Radial", "Axial",
+ "Spherical", "Cubical", "Conical", "Cylindrical", "Planar", "Volumetric",
+ "Holographic", "Fractal", "Recursive", "Iterative", "Generative", "Procedural",
+ "Algorithmic", "Heuristic", "Stochastic", "Deterministic", "Emergent", "Complex",
+ "Networked", "Distributed", "Centralized", "Decentralized", "PeerToPeer",
+ "Hierarchical", "Flat", "Layered", "Nested", "Interconnected", "Intertwined",
+ "Woven", "Knitted", "Braided", "Fused", "Welded", "Bolted", "Glued",
+ "Stitched", "Bound", "Loose", "Free", "Contained", "Released", "Captured",
+ "Escaped", "Wild", "Domesticated", "Feral", "Tame", "Savage", "Civilized",
+ "Primitive", "Advanced", "Rudimentary", "Sophisticated", "Crude", "Refined",
+ "Polished", "RoughHewn", "Raw", "Cooked", "Processed", "Natural", "Organic",
+ "Synthetic", "Artificial", "Genuine", "Counterfeit", "Imitation", "Original",
+ "Reproduction", "Authentic", "Spurious", "Legitimate", "Illegitimate",
+ "Valid", "Invalid", "Sound", "Fallacious", "True", "Misleading", "Erroneous"
+]))
+
+OBJECTS = list(set([
+ # Core
+ "Wizardry", "Maven", "Account", "Squad", "Tips", "Night", "Life",
+ "Dreams", "Setup", "Warrior", "Dad", "Moments", "Gram", "Fotos",
+ "Tales", "Key", "Gem", "Crown", "Sword", "Shield", "Orb", "Crystal",
+ "Book", "Star", "Planet", "Cloud", "Tree", "River", "Mountain",
+ "City", "Code", "Pixel", "Byte", "Note", "Rhythm", "Brush", "Canvas",
+ "Machine", "Network", "Engine", "Galaxy", "Universe", "Dimension",
+ "Realm", "Kingdom", "Empire", "Citadel", "Fortress", "Tower", "Dungeon",
+ "Cavern", "Labyrinth", "Portal", "Gate", "Rune", "Sigil", "Talisman",
+ "Amulet", "Relic", "Artifact", "Scroll", "Tome", "Codex", "Grimoire",
+ "Map", "Compass", "Sextant", "Telescope", "Microscope", "Elixir", "Potion",
+ "Flask", "Vial", "Herb", "Root", "Seed", "Spore", "Gemstone", "Scepter",
+ "Wand", "Staff", "Blade", "Dagger", "Arrow", "Bow", "Axe", "Hammer",
+ "Armor", "Helmet", "Gauntlet", "Boot", "Cloak", "Ring", "Throne", "Altar",
+ "Forge", "Anvil", "Loom", "Quill", "Ink", "Parchment", "Pigment", "Clay",
+ "Stone", "Wood", "Metal", "Glass", "Circuit", "Wire", "Chip", "Core",
+ "Matrix", "Grid", "Node", "Server", "Database", "Algorithm", "Script",
+ "Glitch", "Bug", "Patch", "Mod", "Console", "Controller", "Keyboard",
+ "Mouse", "Headset", "Monitor", "Stream", "Channel", "Feed", "Echo",
+ "Signal", "Wave", "Particle", "Atom", "Molecule", "Sun", "Moon", "Comet",
+ "Asteroid", "Nebula", "Void", "Abyss", "Nexus", "Heart", "Soul", "Mind",
+ "Spirit", "Nightmare", "Memory", "Thought", "Idea", "Concept", "Theory",
+ "Law", "Rule", "Quest", "Journey", "Saga", "Legend", "Myth", "Fable",
+ "Story", "Song", "Melody", "Harmony", "Beat", "Pulse", "Silence",
+ "Shadow", "Light", "Dark", "Dawn", "Dusk", "Twilight", "Midnight",
+ "Noon", "Sky", "Rain", "Snow", "Wind", "Storm", "Fire", "Flame",
+ "Ember", "Ash", "Water", "Ocean", "Sea", "Lake", "Pond", "Tide",
+ "Earth", "Soil", "Sand", "Dust", "Rock", "Valley", "Forest", "Grove",
+ "Leaf", "Branch", "Flower", "Thorn", "Vine", "Moss", "Fungus", "Beast",
+ "Creature", "Monster", "Dragon", "Phoenix", "Griffin", "Unicorn", "Wolf",
+ "Bear", "Eagle", "Raven", "Serpent", "Spider", "Scarab", "Data", "Info",
+ "Knowledge", "Wisdom", "Power", "Force", "Energy", "Magic", "Source",
+ "Lock", "Chain", "Puzzle", "Riddle", "Secret", "Clue", "Truth", "Lie",
+ "Hope", "Fear", "Joy", "Sorrow", "Anger", "Peace", "Chaos", "Order",
+ "Death", "Fate", "Destiny", "Time", "Space", "Reality", "Illusion", "Specter",
+ # Expansion
+ "Castle", "Keep", "Manor", "Villa", "Palace", "Temple", "Shrine", "Monastery",
+ "Abbey", "Cathedral", "Church", "Chapel", "Mosque", "Synagogue", "Pagoda",
+ "Pyramid", "Ziggurat", "Mausoleum", "Tomb", "Crypt", "Catacomb", "Ossuary",
+ "Hut", "Cabin", "Cottage", "House", "Home", "Apartment", "Condo", "Studio",
+ "Loft", "Penthouse", "Mansion", "Estate", "Chateau", "Bungalow", "Townhouse",
+ "Shack", "Tent", "Yurt", "Igloo", "Treehouse", "Cave", "Burrow", "Nest",
+ "Hive", "Lair", "Den", "Sanctuary", "Refuge", "Haven", "Oasis", "Island",
+ "Peninsula", "Continent", "Archipelago", "Volcano", "Geyser", "HotSpring",
+ "Glacier", "Iceberg", "Fjord", "Canyon", "Gorge", "Ravine", "Plateau",
+ "Mesa", "Butte", "Hill", "Peak", "Summit", "Ridge", "Cliff", "Crag",
+ "Beach", "Shore", "Coast", "Delta", "Estuary", "Bay", "Gulf", "Strait",
+ "Channel", "Sound", "Lagoon", "Marsh", "Swamp", "Bog", "Fen", "Wetland",
+ "Tundra", "Taiga", "Savanna", "Prairie", "Steppe", "Desert", "Wasteland",
+ "Jungle", "Rainforest", "Woodland", "Thicket", "Copse", "Meadow", "Field",
+ "Pasture", "Garden", "Orchard", "Vineyard", "Farm", "Ranch", "Plantation",
+ "Road", "Path", "Trail", "Track", "Street", "Avenue", "Boulevard", "Highway",
+ "Freeway", "Bridge", "Tunnel", "Overpass", "Underpass", "Canal", "Aqueduct",
+ "Dam", "Reservoir", "Well", "Cistern", "Fountain", "Pipeline", "Sewer",
+ "Mine", "Quarry", "OilRig", "WindTurbine", "SolarPanel", "PowerPlant",
+ "Factory", "Workshop", "Mill", "Refinery", "Warehouse", "Silo", "Granary",
+ "Depot", "Hangar", "Dock", "Pier", "Wharf", "Harbor", "Port", "Airport",
+ "Station", "Terminal", "Platform", "Stop", "Market", "Bazaar", "Mall",
+ "Shop", "Store", "Boutique", "Emporium", "Gallery", "Museum", "Library",
+ "Archive", "School", "University", "College", "Academy", "Institute",
+ "Laboratory", "Observatory", "Studio", "Theater", "Cinema", "Amphitheater",
+ "Arena", "Stadium", "Colosseum", "Gymnasium", "Spa", "Bathhouse", "Hospital",
+ "Clinic", "Infirmary", "Asylum", "Sanitarium", "Orphanage", "Prison", "Jail",
+ "Barracks", "Garrison", "Armory", "Arsenal", "Bunker", "Trench", "Wall",
+ "Fence", "Barricade", "Moat", "Rampart", "Parapet", "Battlement", "Watchtower",
+ "Lighthouse", "BellTower", "ClockTower", "Spire", "Steeple", "Dome", "Arch",
+ "Column", "Pillar", "Statue", "Monument", "Obelisk", "Fresco", "Mural",
+ "Tapestry", "Mosaic", "StainedGlass", "Sculpture", "Painting", "Drawing",
+ "Sketch", "Etching", "Engraving", "Photograph", "Hologram", "Blueprint",
+ "Diagram", "Schematic", "Manuscript", "Document", "Letter", "Journal",
+ "Diary", "Ledger", "Logbook", "Manifest", "Treaty", "Contract", "Deed",
+ "Will", "Testament", "Proclamation", "Decree", "Edict", "Charter", "Constitution",
+ "Scripture", "Gospel", "Sutra", "Veda", "Koran", "Torah", "Bible", "Hymn",
+ "Prayer", "Chant", "Mantra", "Incantation", "Spell", "Curse", "Blessing",
+ "Prophecy", "Omen", "Sign", "Token", "Symbol", "Emblem", "Crest", "Banner",
+ "Flag", "Standard", "Pennant", "Badge", "Insignia", "Medal", "Ribbon",
+ "Coin", "Currency", "Note", "Bill", "Token", "Chip", "Bar", "Ingot", "Nugget",
+ "Dust", "Powder", "Crystal", "Shard", "Fragment", "Piece", "Slice", "Lump",
+ "Block", "Slab", "Sheet", "Plate", "Rod", "Bar", "Wire", "Cable", "Fiber",
+ "Thread", "String", "Rope", "Cord", "Twine", "Yarn", "Fabric", "Cloth",
+ "Textile", "Leather", "Hide", "Pelt", "Fur", "Wool", "Cotton", "Silk",
+ "Linen", "Hemp", "Canvas", "Paper", "Cardboard", "Plastic", "Rubber",
+ "Ceramic", "Porcelain", "Earthenware", "Brick", "Tile", "Concrete", "Asphalt",
+ "Tar", "Resin", "Amber", "Jet", "Ivory", "Bone", "Horn", "Antler", "Shell",
+ "Pearl", "Coral", "Scale", "Feather", "Tooth", "Claw", "Talon", "Fang",
+ "Venom", "Antidote", "Toxin", "Acid", "Base", "Solvent", "Catalyst", "Reagent",
+ "Compound", "Mixture", "Solution", "Suspension", "Emulsion", "Gel", "Foam",
+ "Aerosol", "Smoke", "Vapor", "Gas", "Liquid", "Solid", "Plasma", "Slime",
+ "Ooze", "Goo", "Mud", "Silt", "Clay", "Loam", "Gravel", "Pebble", "Boulder",
+ "Meteorite", "Tektite", "Geode", "Fossil", "PetrifiedWood", "Coal", "Graphite",
+ "Diamond", "Quartz", "Feldspar", "Mica", "Granite", "Basalt", "Marble",
+ "Slate", "Sandstone", "Limestone", "Chalk", "Flint", "Obsidian", "Pumice",
+ "Sulfur", "Salt", "Potash", "Nitrate", "Alum", "Borax", "Gypsum", "Talc",
+ "Asbestos", "IronOre", "CopperOre", "GoldOre", "SilverOre", "TinOre",
+ "LeadOre", "ZincOre", "NickelOre", "AluminumOre", "UraniumOre", "TitaniumOre",
+ "Platinum", "Palladium", "Rhodium", "Osmium", "Iridium", "Mercury",
+ "Arsenic", "Antimony", "Bismuth", "Cadmium", "Chromium", "Cobalt",
+ "Manganese", "Molybdenum", "Tungsten", "Vanadium", "Zirconium", "Gallium",
+ "Germanium", "Indium", "Selenium", "Tellurium", "Polonium", "Astatine",
+ "Radon", "Francium", "Radium", "Actinium", "Thorium", "Protactinium",
+ "Neptunium", "Plutonium", "Americium", "Curium", "Berkelium", "Californium",
+ "Einsteinium", "Fermium", "Mendelevium", "Nobelium", "Lawrencium",
+ "Rutherfordium", "Dubnium", "Seaborgium", "Bohrium", "Hassium", "Meitnerium",
+ "Darmstadtium", "Roentgenium", "Copernicium", "Nihonium", "Flerovium",
+ "Moscovium", "Livermorium", "Tennessine", "Oganesson", "Element",
+ "Isotope", "Ion", "Cation", "Anion", "Proton", "Neutron", "Electron",
+ "Photon", "Quark", "Lepton", "Boson", "Fermion", "Gluon", "Graviton",
+ "Neutrino", "Antimatter", "DarkMatter", "DarkEnergy", "Singularity",
+ "BlackHole", "WhiteHole", "Wormhole", "Quasar", "Pulsar", "Magnetar",
+ "Supernova", "Hypernova", "RedGiant", "WhiteDwarf", "BrownDwarf", "NeutronStar",
+ "Protostar", "MainSequence", "Constellation", "Asterism", "Cluster", "Group",
+ "Supercluster", "Filament", "Wall", "Void", "CosmicMicrowaveBackground",
+ "BigBang", "Inflation", "Multiverse", "Hyperspace", "Subspace", "Slipstream",
+ "WarpDrive", "JumpDrive", "Teleporter", "Stargate", "Transporter", "Replicator",
+ "Holodeck", "Phaser", "Blaster", "Lightsaber", "ForceField", "DeflectorShield",
+ "TractorBeam", "CloakingDevice", "SensorArray", "Communicator", "Tricorder",
+ "UniversalTranslator", "Cyberdeck", "NeuralInterface", "Exoskeleton", "CyborgImplant",
+ "BionicArm", "ArtificialEye", "SyntheticOrgan", "GeneMod", "Vat", "Clone",
+ "Android", "Robot", "Drone", "Automaton", "Golem", "Homunculus", "Gargoyle",
+ "Chimera", "Manticore", "Hydra", "Cerberus", "Cyclops", "Giant", "Titan",
+ "Ogre", "Troll", "Goblin", "Orc", "Kobold", "Gremlin", "Imp", "Demon", "Devil",
+ "Angel", "Archangel", "Seraph", "Cherub", "Valkyrie", "Nymph", "Dryad", "Sprite",
+ "Pixie", "Fairy", "Leprechaun", "Gnome", "Dwarf", "Elf", "Hobbit", "Halfling",
+ "Centaur", "Satyr", "Faun", "Minotaur", "Harpy", "Siren", "Mermaid", "Merman",
+ "Naga", "Lamia", "Gorgon", "Medusa", "Sphinx", "Basilisk", "Cockatrice",
+ "Wyvern", "Roc", "Kraken", "Leviathan", "Behemoth", "Juggernaut", "Werewolf",
+ "Vampire", "Lich", "Ghoul", "Zombie", "Mummy", "Skeleton", "Ghost", "Phantom",
+ "Specter", "Wraith", "Poltergeist", "Banshee", "Shade", "Doppelganger",
+ "Shapeshifter", "Illusion", "Mirage", "Phantasm", "Hallucination", "Apparition",
+ "Entity", "Being", "Essence", "Presence", "Aura", "Emanation", "Vibration",
+ "Frequency", "Wavelength", "Spectrum", "Color", "Hue", "Tint", "Shade",
+ "Tone", "Sound", "Noise", "Pitch", "Volume", "Timbre", "Resonance", "Silence",
+ "Scent", "Odor", "Aroma", "Fragrance", "Stench", "Taste", "Flavor", "Aftertaste",
+ "Texture", "Feel", "Grain", "Temperature", "Pressure", "Weight", "Mass",
+ "Density", "Volume", "Area", "Length", "Width", "Height", "Depth", "Distance",
+ "Proximity", "Angle", "Curve", "Line", "Point", "Shape", "Form", "Structure",
+ "Pattern", "Design", "Composition", "Layout", "Arrangement", "Configuration",
+ "System", "Mechanism", "Device", "Apparatus", "Instrument", "Tool", "Utensil",
+ "Gadget", "Contraption", "Widget", "Gizmo", "Thingamajig", "Doodad", "Item",
+ "Object", "Article", "Commodity", "Product", "Goods", "Wares", "Merchandise",
+ "Supplies", "Provisions", "Equipment", "Gear", "Tackle", "Kit", "Outfit",
+ "Apparel", "Clothing", "Garment", "Attire", "Vestment", "Raiment", "Costume",
+ "Uniform", "Jewelry", "Accessory", "Adornment", "Trinket", "Bauble", "Knickknack",
+ "Souvenir", "Memento", "Heirloom", "Treasure", "Prize", "Reward", "Bounty",
+ "Loot", "Spoils", "Plunder", "Trophy", "Gift", "Present", "Offering", "Tribute",
+ "Donation", "Alms", "Charity", "Sacrifice", "Libation", "Incense", "Candle",
+ "Torch", "Lantern", "Lamp", "Lightbulb", "Laser", "Beam", "Ray", "Glimmer",
+ "Spark", "Flash", "Glow", "Shimmer", "Glitter", "Reflection", "Refraction",
+ "Diffraction", "Interference", "Polarization", "Lense", "Mirror", "Prism",
+ "Filter", "Screen", "Monitor", "Display", "Projector", "Camera", "Binoculars",
+ "MagnifyingGlass", "Eyeglasses", "ContactLense", "Microphone", "Speaker",
+ "Headphones", "Earbuds", "Amplifier", "Receiver", "Transmitter", "Antenna",
+ "SatelliteDish", "Modem", "Router", "Switch", "Hub", "Firewall", "Proxy",
+ "VPN", "Cable", "Connector", "Port", "Jack", "Plug", "Socket", "Adapter",
+ "Battery", "PowerSupply", "Generator", "Capacitor", "Resistor", "Transistor",
+ "Diode", "Inductor", "IntegratedCircuit", "Microprocessor", "MemoryChip",
+ "HardDrive", "SSD", "FlashDrive", "OpticalDisc", "FloppyDisk", "TapeDrive",
+ "Motherboard", "CPU", "GPU", "RAM", "ROM", "BIOS", "OperatingSystem", "Software",
+ "Application", "Program", "App", "Utility", "Driver", "Firmware", "Malware",
+ "Virus", "Worm", "Trojan", "Ransomware", "Spyware", "Adware", "Keylogger",
+ "Rootkit", "Botnet", "Firewall", "Antivirus", "Sandbox", "Honeypot",
+ "EncryptionKey", "Password", "Passphrase", "Biometric", "Fingerprint",
+ "RetinaScan", "Voiceprint", "FaceRecognition", "Token", "Certificate",
+ "DigitalSignature", "Blockchain", "Cryptocurrency", "Bitcoin", "Ethereum",
+ "NFT", "SmartContract", "Ledger", "Transaction", "Block", "Hash", "Wallet",
+ "Exchange", "MiningRig", "Node", "Protocol", "Algorithm", "Heuristic",
+ "Function", "Variable", "Constant", "Parameter", "Argument", "Loop",
+ "Condition", "Statement", "Expression", "Syntax", "Semantics", "Compiler",
+ "Interpreter", "Debugger", "IDE", "TextEditor", "VersionControl", "Repository",
+ "Branch", "Merge", "Commit", "Push", "Pull", "Clone", "Fork", "API", "SDK",
+ "Library", "Framework", "Module", "Package", "Dependency", "Class", "Object",
+ "Method", "Attribute", "Inheritance", "Polymorphism", "Encapsulation",
+ "Abstraction", "Interface", "DesignPattern", "Architecture", "Model", "View",
+ "Controller", "DatabaseSchema", "Table", "Row", "Column", "Index", "Query",
+ "SQL", "NoSQL", "JSON", "XML", "CSV", "YAML", "HTML", "CSS", "JavaScript",
+ "Python", "Java", "C++", "CSharp", "Ruby", "PHP", "Swift", "Kotlin", "Go",
+ "Rust", "TypeScript", "Assembly", "MachineCode", "Binary", "Hexadecimal",
+ "Decimal", "Octal", "Character", "String", "Integer", "Float", "Boolean",
+ "Array", "List", "Tuple", "Set", "Dictionary", "Map", "Graph", "Tree",
+ "Stack", "Queue", "LinkedList", "Heap", "Bit", "Flag", "Mask", "Pointer",
+ "Reference", "Handle", "Address", "Buffer", "Cache", "Stream", "File",
+ "Directory", "Path", "URL", "URI", "DomainName", "IP_Address", "MAC_Address",
+ "PortNumber", "Socket", "Packet", "Frame", "Datagram", "Segment", "ProtocolStack",
+ "OSI_Model", "TCP_IP", "HTTP", "HTTPS", "FTP", "SSH", "SMTP", "POP3", "IMAP",
+ "DNS", "DHCP", "UDP", "ICMP", "ARP", "Ethernet", "WiFi", "Bluetooth", "NFC",
+ "Cellular", "Satellite", "FiberOptic", "CopperWire", "RadioWave", "Microwave",
+ "Infrared", "Ultraviolet", "XRay", "GammaRay", "VisibleLight", "SoundWave",
+ "Ultrasound", "Infrasound", "SeismicWave", "GravityWave", "Shockwave",
+ "BlastWave", "TidalWave", "Tsunami", "Ripple", "Current", "Eddy", "Vortex",
+ "Whirlpool", "Waterspout", "Tornado", "Hurricane", "Typhoon", "Cyclone",
+ "Blizzard", "Thunderstorm", "Lightning", "Thunder", "Hail", "Sleet", "Fog",
+ "Smog", "Haze", "Mist", "Dew", "Frost", "Ice", "Snowflake", "Avalanche",
+ "Landslide", "Mudslide", "Earthquake", "Aftershock", "Tremor", "Eruption",
+ "Lava", "Magma", "AshCloud", "PyroclasticFlow", "Caldera", "Crater",
+ "Fissure", "Vent", "FaultLine", "TectonicPlate", "Mantle", "OuterCore",
+ "InnerCore", "Crust", "Atmosphere", "Troposphere", "Stratosphere", "Mesosphere",
+ "Thermosphere", "Exosphere", "Ionosphere", "Magnetosphere", "OzoneLayer",
+ "VanAllenBelt", "Aurora", "Meteor", "Meteoroid", "ShootingStar", "Fireball",
+ "Bolide", "AsteroidBelt", "KuiperBelt", "OortCloud", "InterstellarMedium",
+ "IntergalacticSpace", "LocalGroup", "VirgoSupercluster", "Laniakea",
+ "ObservableUniverse", "CosmicWeb", "EventHorizon", "Spacetime", "Continuum",
+ "FabricOfReality", "AlternateDimension", "PocketUniverse", "AstralPlane",
+ "EtherealPlane", "Feywild", "Shadowfell", "ElementalPlane", "Heavens",
+ "Hells", "Limbo", "Purgatory", "Valhalla", "Elysium", "Underworld", "Afterlife",
+ "Reincarnation", "Nirvana", "Enlightenment", "Ascension", "Transcendence",
+ "Deity", "God", "Goddess", "Pantheon", "Mythology", "Cosmology", "Theology",
+ "Philosophy", "Ideology", "Doctrine", "Dogma", "Creed", "Belief", "Faith",
+ "Doubt", "Heresy", "Blasphemy", "Apostasy", "Schism", "Cult", "Sect",
+ "Religion", "Spirituality", "Atheism", "Agnosticism", "Humanism", "Secularism",
+ "Nihilism", "Existentialism", "Stoicism", "Epicureanism", "Cynicism",
+ "Hedonism", "Utilitarianism", "Rationalism", "Empiricism", "Idealism",
+ "Materialism", "Dualism", "Monism", "Determinism", "FreeWill", "Predestination",
+ "Karma", "Dharma", "Samsara", "Moksha", "Tao", "Chi", "Yin", "Yang", "Zen",
+ "Koan", "Satori", "Yoga", "Meditation", "Mindfulness", "Prayer", "Ritual",
+ "Ceremony", "Sacrament", "Initiation", "Pilgrimage", "Fasting", "Feast",
+ "Festival", "Holiday", "Sabbath", "Jubilee", "Tradition", "Custom", "Etiquette",
+ "Manners", "Protocol", "CodeOfConduct", "HonorCode", "Oath", "Vow", "Pledge",
+ "Promise", "Contract", "Agreement", "Treaty", "Alliance", "Pact", "Covenant",
+ "Law", "Statute", "Ordinance", "Regulation", "Rule", "Precedent", "Jurisprudence",
+ "Justice", "Equity", "Fairness", "Rights", "Freedoms", "Liberties", "Duties",
+ "Responsibilities", "Obligations", "Privileges", "Immunities", "Crime",
+ "Felony", "Misdemeanor", "Infraction", "Violation", "Offense", "Transgression",
+ "Sin", "Vice", "Virtue", "Merit", "Demerit", "Punishment", "Penalty",
+ "Fine", "Sentence", "Imprisonment", "Execution", "Exile", "Banishment",
+ "Ostracism", "Shunning", "Reputation", "Honor", "Shame", "Glory", "Infamy",
+ "Fame", "Notoriety", "Legacy", "Heritage", "Lineage", "Ancestry", "Descendants",
+ "Family", "Clan", "Tribe", "Nation", "Race", "Ethnicity", "Culture", "Society",
+ "Civilization", "Community", "Neighborhood", "Village", "Town", "Metropolis",
+ "Megalopolis", "State", "Province", "Territory", "Country", "Federation",
+ "Confederation", "Union", "Alliance", "Coalition", "Organization", "Institution",
+ "Corporation", "Company", "Business", "Enterprise", "Startup", "NonProfit",
+ "Foundation", "Association", "Guild", "Union", "Club", "Society", "Fraternity",
+ "Sorority", "Team", "Crew", "Gang", "Mob", "Syndicate", "Cartel", "Cult",
+ "Faction", "Party", "Movement", "Government", "Monarchy", "Republic",
+ "Democracy", "Theocracy", "Autocracy", "Oligarchy", "Anarchy", "Dictatorship",
+ "Totalitarianism", "Feudalism", "Capitalism", "Socialism", "Communism",
+ "Fascism", "Nationalism", "Imperialism", "Colonialism", "Globalism",
+ "Federalism", "Separatism", "Populism", "Liberalism", "Conservatism",
+ "Progressivism", "Libertarianism", "Environmentalism", "Feminism", "Pacifism",
+ "Militarism", "Revolution", "Rebellion", "Uprising", "Coup", "Insurrection",
+ "CivilWar", "War", "Battle", "Skirmish", "Siege", "Campaign", "Conflict",
+ "Truce", "Ceasefire", "Armistice", "PeaceTreaty", "Diplomacy", "Negotiation",
+ "Embargo", "Sanctions", "Espionage", "Intelligence", "Propaganda", "Sabotage",
+ "Terrorism", "CounterTerrorism", "Resistance", "Underground", "Dissident",
+ "Refugee", "AsylumSeeker", "DisplacedPerson", "Casualty", "Veteran",
+ "Memorial", "Monument", "History", "Prehistory", "Antiquity", "MiddleAges",
+ "Renaissance", "Enlightenment", "IndustrialRevolution", "InformationAge",
+ "Future", "Utopia", "Dystopia", "Apocalypse", "PostApocalypse", "Armageddon",
+ "Ragnarok", "JudgmentDay", "EndTimes", "NewBeginning", "GoldenAge",
+ "DarkAge", "Epoch", "Era", "Period", "Millennium", "Century", "Decade",
+ "Year", "Season", "Month", "Week", "Day", "Hour", "Minute", "Second",
+ "Moment", "Instant", "Eternity", "Infinity", "Continuum", "Cycle", "Rhythm",
+ "Tempo", "Cadence", "Frequency", "Interval", "Duration", "Timeline",
+ "Schedule", "Calendar", "Almanac", "Chronicle", "Annals", "Record", "Log",
+ "Journal", "Diary", "Memoir", "Biography", "Autobiography", "Novel",
+ "ShortStory", "Novella", "Epic", "Poem", "Ballad", "Sonnet", "Haiku",
+ "Limerick", "Verse", "Prose", "Play", "Script", "Screenplay", "Libretto",
+ "Lyrics", "Score", "SheetMusic", "Symphony", "Concerto", "Sonata", "Opera",
+ "Ballet", "Musical", "Oratorio", "Cantata", "Fugue", "Overture", "Suite",
+ "Aria", "Chorus", "Recitative", "Etude", "Nocturne", "Prelude", "Rhapsody",
+ "Waltz", "March", "Anthem", "Hymn", "Carol", "Chant", "Madrigal", "Motet",
+ "FolkSong", "Blues", "Jazz", "Rock", "Pop", "HipHop", "Electronic", "Classical",
+ "WorldMusic", "Ambient", "Soundtrack", "Jingle", "ThemeSong", "Lullaby",
+ "NurseryRhyme", "Riddle", "Proverb", "Maxim", "Aphorism", "Epigram", "Quote",
+ "Slogan", "Motto", "Catchphrase", "Buzzword", "Jargon", "Slang", "Dialect",
+ "Accent", "Language", "Alphabet", "Character", "Glyph", "Ideogram", "Logogram",
+ "Syllabary", "Phoneme", "Morpheme", "Word", "Phrase", "Clause", "Sentence",
+ "Paragraph", "Chapter", "Volume", "Text", "Speech", "Lecture", "Sermon",
+ "Debate", "Discussion", "Conversation", "Dialogue", "Monologue", "Soliloquy",
+ "Narration", "Description", "Exposition", "Argument", "Rhetoric", "Logic",
+ "Reason", "Emotion", "Passion", "Instinct", "Intuition", "Conscience",
+ "Morality", "Ethics", "Aesthetics", "Beauty", "Sublime", "Art", "Craft",
+ "Skill", "Technique", "Talent", "Genius", "Creativity", "Imagination",
+ "Inspiration", "Muse", "Medium", "Style", "Genre", "Movement", "School",
+ "Masterpiece", "WorkOfArt", "Oeuvre", "Canon", "Critique", "Review",
+ "Analysis", "Interpretation", "Theory", "Hypothesis", "Experiment",
+ "Observation", "Measurement", "Data", "Evidence", "Proof", "Conclusion",
+ "Discovery", "Invention", "Innovation", "Technology", "Science", "Mathematics",
+ "Physics", "Chemistry", "Biology", "Astronomy", "Geology", "Ecology",
+ "Medicine", "Engineering", "ComputerScience", "Psychology", "Sociology",
+ "Anthropology", "Economics", "PoliticalScience", "History", "Linguistics",
+ "Philosophy", "Literature", "Musicology", "ArtHistory", "Theology",
+ "Education", "Pedagogy", "Curriculum", "Lesson", "Lecture", "Seminar",
+ "Workshop", "Tutorial", "Exam", "Test", "Quiz", "Assignment", "Homework",
+ "Project", "Thesis", "Dissertation", "Diploma", "Degree", "Certificate",
+ "License", "Qualification", "Credential", "Skillset", "Expertise", "Competence",
+ "Proficiency", "Mastery", "KnowledgeBase", "Wisdom", "Understanding",
+ "Insight", "Awareness", "Perception", "Cognition", "Memory", "Recall",
+ "Recognition", "Learning", "Attention", "Concentration", "Focus", "Distraction",
+ "ThoughtProcess", "ProblemSolving", "DecisionMaking", "Judgment", "Bias",
+ "Heuristic", "Fallacy", "LogicError", "CognitiveDissonance", "Mindset",
+ "Attitude", "Perspective", "Worldview", "Paradigm", "FrameOfReference",
+ "BeliefSystem", "ValueSystem", "Motivation", "Drive", "Ambition", "Goal",
+ "Objective", "Purpose", "Meaning", "Intention", "Willpower", "Discipline",
+ "Habit", "Routine", "Emotion", "Feeling", "Mood", "Temperament", "Personality",
+ "Character", "Trait", "Disposition", "Behavior", "Action", "Reaction",
+ "Response", "Interaction", "Relationship", "Bond", "Connection", "Attachment",
+ "Affection", "Love", "Lust", "Infatuation", "Friendship", "Companionship",
+ "Rivalry", "Enmity", "Hatred", "Antipathy", "Indifference", "Empathy",
+ "Sympathy", "Compassion", "Kindness", "Cruelty", "Generosity", "Greed",
+ "Envy", "Jealousy", "Pride", "Humility", "Anger", "Rage", "Irritation",
+ "Annoyance", "Frustration", "Disappointment", "Sadness", "Grief", "Sorrow",
+ "Melancholy", "Despair", "Hope", "Optimism", "Pessimism", "Joy", "Happiness",
+ "Elation", "Ecstasy", "Bliss", "Contentment", "Satisfaction", "Gratitude",
+ "Regret", "Remorse", "Guilt", "Shame", "Embarrassment", "Anxiety", "Worry",
+ "Fear", "Terror", "Panic", "Phobia", "Stress", "Tension", "Relaxation",
+ "Calm", "Serenity", "Peace", "Tranquility", "Excitement", "Thrill",
+ "Anticipation", "Suspense", "Surprise", "Amazement", "Awe", "Wonder",
+ "Curiosity", "Boredom", "Apathy", "Lethargy", "Fatigue", "Energy",
+ "Vitality", "Vigor", "Stamina", "Endurance", "Strength", "Power", "Weakness",
+ "Fragility", "Resilience", "Toughness", "Hardiness", "Agility", "Dexterity",
+ "Coordination", "Balance", "Flexibility", "Speed", "Quickness", "Reflexes",
+ "Accuracy", "Precision", "Steadiness", "Health", "Wellness", "Sickness",
+ "Illness", "Disease", "Malady", "Ailment", "Condition", "Disorder",
+ "Syndrome", "Injury", "Wound", "Trauma", "Pain", "Ache", "Soreness",
+ "Comfort", "Discomfort", "Pleasure", "Displeasure", "Sensation", "Perception",
+ "Sight", "Vision", "Hearing", "Audition", "Smell", "Olfaction", "Taste",
+ "Gustation", "Touch", "Tactition", "Proprioception", "Nociception",
+ "Thermoception", "Equilibrioception", "Chronoception", "Interoception",
+ "Sense", "Instinct", "GutFeeling", "Hunch", "Premonition", "Clairvoyance",
+ "Telepathy", "Telekinesis", "Precognition", "Retrocognition", "Psychometry",
+ "AstralProjection", "Mediumship", "Channeling", "Divination", "Scrying",
+ "Augury", "Tarot", "Runes", "Astrology", "Numerology", "Palmistry",
+ "Geomancy", "Chiromancy", "Cartomancy", "Oneiromancy", "Necromancy",
+ "Alchemy", "Thaumaturgy", "Sorcery", "Witchcraft", "Wizardry", "Enchantment",
+ "Conjuration", "Summoning", "Invocation", "Evocation", "Abjuration",
+ "Transmutation", "Illusion", "Divination", "Restoration", "Destruction",
+ "Alteration", "Mysticism", "Occultism", "Esotericism", "Gnosticism",
+ "Hermeticism", "Kabbalah", "Theosophy", "Wicca", "Paganism", "Shamanism",
+ "Animism", "Polytheism", "Monotheism", "Pantheism", "Panentheism", "Deism",
+ "Agnosticism", "Atheism", "Humanism", "Secularism"
+]))
+
+ACTIONS_VERBS = list(set([
+ # Core
+ "Coding", "Gaming", "Writing", "Reading", "Drawing", "Singing",
+ "Dancing", "Running", "Jumping", "Building", "Exploring", "Crafting",
+ "Dreaming", "Living", "Growing", "Creating", "Sailing", "Flying",
+ "Fighting", "Casting", "Healing", "Stealing", "Forging", "Analyzing",
+ "Synthesizing", "Navigating", "Awakening", "Converging", "Hacking",
+ "Streaming", "Designing", "Composing", "Painting", "Sculpting", "Brewing",
+ "Enchanting", "Conjuring", "Summoning", "Banishing", "Protecting",
+ "Defending", "Attacking", "Striking", "Dodging", "Sneaking", "Tracking",
+ "Hunting", "Trapping", "Taming", "Riding", "Diving", "Swimming",
+ "Climbing", "Crawling", "Sprinting", "Leaping", "Falling", "Rising",
+ "Ascending", "Descending", "Teleporting", "Phasing", "Shifting", "Morphing",
+ "Transforming", "Shrinking", "Melting", "Freezing", "Exploding",
+ "Imploding", "Collapsing", "Expanding", "Radiating", "Absorbing",
+ "Reflecting", "Refracting", "Focusing", "Channeling", "Meditating",
+ "Remembering", "Forgetting", "Learning", "Teaching", "Knowing", "Believing",
+ "Doubting", "Questioning", "Answering", "Solving", "Destroying", "Breaking",
+ "Mending", "Restoring", "Corrupting", "Cleansing", "Blessing", "Cursing",
+ "Judging", "Forgiving", "Seeking", "Finding", "Losing", "Winning",
+ "Failing", "Surviving", "Thriving", "Vanishing", "Appearing", "Echoing",
+ "Resonating", "Vibrating", "Pulsing", "Shining", "Fading", "Observing",
+ "Listening", "Speaking", "Whispering", "Shouting", "Playing", "Working",
+ "Resting", "Waiting", "Watching", "Plotting", "Scheming", "Strategizing",
+ "Calculating", "Computing", "Processing", "Decrypting", "Encrypting",
+ "Uploading", "Downloading", "Connecting", "Disconnecting", "Evolving",
+ "Adapting", "Overcoming", "Mastering", "Yielding", "Submitting", "Governing",
+ # Expansion
+ "Thinking", "Pondering", "Contemplating", "Reflecting", "Considering",
+ "Imagining", "Visualizing", "Inventing", "Innovating", "Experimenting",
+ "Testing", "Measuring", "Calibrating", "Documenting", "Recording", "Logging",
+ "Charting", "Graphing", "Mapping", "Modeling", "Simulating", "Predicting",
+ "Forecasting", "Estimating", "Guessing", "Assuming", "Inferring", "Deducing",
+ "Inducing", "Reasoning", "Arguing", "Debating", "Discussing", "Negotiating",
+ "Bargaining", "Compromising", "Collaborating", "Cooperating", "Competing",
+ "Challenging", "Opposing", "Resisting", "Rebelling", "Fighting", "Battling",
+ "WagingWar", "Defending", "Guarding", "Shielding", "Warding", "Parrying",
+ "Blocking", "Intercepting", "Avoiding", "Evading", "Escaping", "Fleeing",
+ "Retreating", "Advancing", "Charging", "Pursuing", "Chasing", "Hunting",
+ "Stalking", "Ambushing", "Trapping", "Capturing", "Imprisoning", "Binding",
+ "Restraining", "Enslaving", "Liberating", "Freeing", "Rescuing", "Saving",
+ "Helping", "Assisting", "Supporting", "Aiding", "Comforting", "Consoling",
+ "Encouraging", "Motivating", "Inspiring", "Leading", "Guiding", "Directing",
+ "Commanding", "Ordering", "Instructing", "Training", "Coaching", "Mentoring",
+ "Advising", "Counseling", "Consulting", "Informing", "Notifying", "Warning",
+ "Alerting", "Reporting", "Communicating", "Signaling", "Gesturing", "Expressing",
+ "Showing", "Demonstrating", "Illustrating", "Explaining", "Describing",
+ "Narrating", "Reciting", "Performing", "Acting", "Mimicking", "Impersonating",
+ "Joking", "Teasing", "Flirting", "Seducing", "Charming", "Persuading",
+ "Convincing", "Manipulating", "Deceiving", "Lying", "Betraying", "Tricking",
+ "Swindling", "Cheating", "Stealing", "Robbing", "Pilfering", "Plundering",
+ "Looting", "Smuggling", "Poaching", "Trespassing", "Violating", "Breaking",
+ "Vandalizing", "Destroying", "Demolishing", "Annihilating", "Obliterating",
+ "Erasing", "Deleting", "Burning", "Scorching", "Melting", "Dissolving",
+ "Crushing", "Shattering", "Splintering", "Tearing", "Ripping", "Cutting",
+ "Slicing", "Chopping", "Carving", "Etching", "Engraving", "Sculpting",
+ "Molding", "Shaping", "Forming", "Assembling", "Constructing", "Erecting",
+ "Raising", "Lifting", "Hoisting", "Lowering", "Dropping", "Placing", "Setting",
+ "Arranging", "Organizing", "Sorting", "Classifying", "Categorizing", "Labeling",
+ "Indexing", "Filing", "Storing", "Stockpiling", "Hoarding", "Collecting",
+ "Gathering", "Harvesting", "Reaping", "Mining", "Excavating", "Drilling",
+ "Digging", "Tunneling", "Exploring", "Surveying", "Scouting", "Reconnoitering",
+ "Patrolling", "Searching", "Seeking", "Questing", "Journeying", "Traveling",
+ "Wandering", "Roaming", "Drifting", "Migrating", "Commuting", "Driving",
+ "Flying", "Floating", "Hovering", "Gliding", "Soaring", "Plummeting",
+ "Diving", "Surfing", "Skating", "Skiing", "Snowboarding", "Cycling",
+ "Hiking", "Trekking", "Backpacking", "Camping", "Fishing", "Boating",
+ "Kayaking", "Canoeing", "Rafting", "Rowing", "Paddling", "Sailing",
+ "Cruising", "Motoring", "Piloting", "Navigating", "Steering", "Maneuvering",
+ "Parking", "Docking", "Landing", "Launching", "TakingOff", "Warping",
+ "Jumping", "Blinking", "Phasing", "Shifting", "Teleporting", "Summoning",
+ "Conjuring", "Invoking", "Evoking", "Banishing", "Dismissing", "Dispelling",
+ "Nullifying", "Countering", "Abjuring", "Warding", "Shielding", "Protecting",
+ "Healing", "Curing", "Mending", "Restoring", "Regenerating", "Reviving",
+ "Resurrecting", "Enhancing", "Augmenting", "Boosting", "Empowering",
+ "Strengthening", "Weakening", "Debilitating", "Crippling", "Hindering",
+ "Slowing", "Hastening", "Accelerating", "Enchanting", "Imbuing", "Blessing",
+ "Cursing", "Hexing", "Jinxing", "Bewitching", "Charming", "Transmuting",
+ "Altering", "Changing", "Morphing", "Transforming", "Shapeshifting",
+ "Illusioning", "Disguising", "Camouflaging", "Cloaking", "Vanishing",
+ "Appearing", "Materializing", "Dematerializing", "Divining", "Scrying",
+ "Predicting", "Foreseeing", "Prophesying", "Communicating", "Telepathing",
+ "Controlling", "Dominating", "Influencing", "Commanding", "Compelling",
+ "Possessing", "Animating", "ConstructingGolems", "RaisingUndead", "Necromancing",
+ "Experimenting", "Researching", "Studying", "Learning", "Memorizing",
+ "Recalling", "Forgetting", "Understanding", "Comprehending", "Interpreting",
+ "Translating", "Deciphering", "Decoding", "Encoding", "Encrypting",
+ "Computing", "Calculating", "Programming", "Debugging", "Testing", "Optimizing",
+ "Refactoring", "Deploying", "Maintaining", "Updating", "Upgrading",
+ "Downgrading", "Installing", "Uninstalling", "Configuring", "Troubleshooting",
+ "Monitoring", "Logging", "Auditing", "Securing", "Hardening", "Patching",
+ "BackingUp", "Restoring", "Migrating", "Cloning", "Virtualizing",
+ "Containerizing", "Orchestrating", "Scaling", "LoadBalancing", "Networking",
+ "Routing", "Switching", "Bridging", "Firewalling", "Filtering", "Proxying",
+ "Authenticating", "Authorizing", "Accounting", "Browsing", "Searching",
+ "Googling", "Surfing", "Streaming", "Downloading", "Uploading", "Sharing",
+ "Posting", "Blogging", "Vlogging", "Tweeting", "Commenting", "Liking",
+ "Subscribing", "Following", "Friending", "Unfriending", "Blocking", "Reporting",
+ "Messaging", "Chatting", "Emailing", "Calling", "VideoConferencing", "Gaming",
+ "Playing", "Competing", "Cooperating", "Winning", "Losing", "Drawing",
+ "LevelingUp", "Grinding", "Farming", "Looting", "Crafting", "Trading",
+ "Questing", "Raiding", "Exploring", "Roleplaying", "Strategizing", "Tacticking",
+ "Practicing", "Training", "Exercising", "WorkingOut", "Stretching", "WarmingUp",
+ "CoolingDown", "Lifting", "Running", "Jogging", "Walking", "Swimming",
+ "Cycling", "Yogaing", "Pilatesing", "Meditating", "Relaxing", "Resting",
+ "Sleeping", "Napping", "Dreaming", "Waking", "Rising", "Eating", "Drinking",
+ "Feasting", "Dining", "Snacking", "Tasting", "Sipping", "Gulping", "Chewing",
+ "Swallowing", "Digesting", "Breathing", "Inhaling", "Exhaling", "Panting",
+ "Gasping", "Sighing", "Yawning", "Coughing", "Sneezing", "Hiccuping",
+ "Burping", "Farting", "Seeing", "Looking", "Watching", "Observing", "Staring",
+ "Gazing", "Glancing", "Peeking", "Squinting", "Blinking", "Winking", "Hearing",
+ "Listening", "Overhearing", "Eavesdropping", "Smelling", "Sniffing", "Inhaling",
+ "Tasting", "Savoring", "Licking", "Touching", "Feeling", "Probing", "Poking",
+ "Stroking", "Petting", "Patting", "Grabbing", "Grasping", "Clutching",
+ "Holding", "Carrying", "Lifting", "Pushing", "Pulling", "Dragging", "Throwing",
+ "Catching", "Tossing", "Hitting", "Punching", "Kicking", "Slapping", "Striking",
+ "Bashing", "Smashing", "Crushing", "Shooting", "Firing", "Launching",
+ "Bombing", "Exploding", "Detonating", "Speaking", "Talking", "Chatting",
+ "Whispering", "Muttering", "Murmuring", "Shouting", "Yelling", "Screaming",
+ "Singing", "Humming", "Whistling", "Chanting", "Reciting", "Laughing",
+ "Giggling", "Chuckling", "Crying", "Sobbing", "Weeping", "Wailing", "Groaning",
+ "Moaning", "Grunting", "Growling", "Snarling", "Hissing", "Roaring", "Barking",
+ "Meowing", "Chirping", "Croaking", "Buzzing", "Howling", "Screeching",
+ "Clapping", "Snapping", "Stomping", "Tapping", "Knocking", "Banging",
+ "Rattling", "Shaking", "Vibrating", "Pulsing", "Beating", "Thumping",
+ "Flowing", "Streaming", "Pouring", "Dripping", "Leaking", "Seeping",
+ "Gushing", "Spraying", "Splashing", "Bubbling", "Boiling", "Simmering",
+ "Freezing", "Thawing", "Melting", "Evaporating", "Condensing", "Sublimating",
+ "Depositing", "Growing", "Shrinking", "Expanding", "Contracting", "Swelling",
+ "Blooming", "Wilting", "Sprouting", "Ripening", "Rotting", "Decaying",
+ "Decomposing", "Festering", "Fermenting", "Aging", "Maturing", "Developing",
+ "Evolving", "Mutating", "Adapting", "Regenerating", "Reproducing", "Breeding",
+ "Spawning", "Hatching", "Birthing", "Nursing", "Nurturing", "Raising",
+ "Teaching", "Educating", "Indoctrinating", "Brainwashing", "Grooming",
+ "Socializing", "Integrating", "Assimilating", "Alienating", "Isolating",
+ "Segregating", "Uniting", "Dividing", "Joining", "Leaving", "Entering",
+ "Exiting", "Arriving", "Departing", "Staying", "Moving", "Relocating",
+ "Settling", "Establishing", "Founding", "Abolishing", "Ending", "Finishing",
+ "Completing", "Starting", "Beginning", "Initiating", "Continuing", "Persisting",
+ "Resuming", "Pausing", "Stopping", "Ceasing", "Halting", "Interrupting",
+ "Delaying", "Postponing", "Accelerating", "Slowing", "Maintaining", "Sustaining",
+ "Preserving", "Conserving", "Protecting", "Saving", "Wasting", "Squandering",
+ "Consuming", "Using", "Utilizing", "Employing", "Applying", "Implementing",
+ "Executing", "Performing", "Operating", "Running", "Managing", "Administering",
+ "Supervising", "Overseeing", "Controlling", "Governing", "Ruling", "Leading",
+ "Following", "Obeying", "Serving", "Assisting", "Working", "Toiling", "Laboring",
+ "Striving", "Endeavoring", "Attempting", "Trying", "Succeeding", "Achieving",
+ "Accomplishing", "Failing", "Struggling", "Suffering", "Enduring", "Tolerating",
+ "Accepting", "Rejecting", "Approving", "Disapproving", "Praising", "Criticizing",
+ "Blaming", "Accusing", "Condemning", "Forgiving", "Pardoning", "Excusing",
+ "Justifying", "Defending", "Advocating", "Supporting", "Opposing", "Protesting",
+ "Demonstrating", "Petitioning", "Lobbying", "Voting", "Campaigning", "Electing",
+ "Appointing", "Promoting", "Demoting", "Hiring", "Firing", "Retiring",
+ "Resigning", "Investing", "Trading", "Buying", "Selling", "Bartering", "Lending",
+ "Borrowing", "Donating", "Receiving", "Giving", "Taking", "Sharing", "Dividing",
+ "Combining", "Merging", "Separating", "Splitting", "Connecting", "Disconnecting",
+ "Linking", "Unlinking", "Attaching", "Detaching", "Binding", "Unbinding",
+ "Wrapping", "Unwrapping", "Covering", "Uncovering", "Hiding", "Revealing",
+ "Exposing", "Concealing", "Masking", "Disguising", "Identifying", "Recognizing",
+ "Labeling", "Marking", "Branding", "Noticing", "Perceiving", "Realizing",
+ "Acknowledging", "Ignoring", "Overlooking", "Forgetting", "Remembering",
+ "Recollecting", "Reminiscing", "Anticipating", "Expecting", "Hoping", "Fearing",
+ "Worrying", "Wishing", "Desiring", "Craving", "Yearning", "Loving", "Hating",
+ "Liking", "Disliking", "Admiring", "Despising", "Respecting", "Disrespecting",
+ "Trusting", "Distrusting", "Believing", "Doubting", "Questioning", "Wondering",
+ "Imagining", "Fantasizing", "Hallucinating", "Focusing", "Concentrating",
+ "PayingAttention", "Ignoring", "Meditating", "Praying", "Worshipping",
+ "Celebrating", "Mourning", "Grieving", "Ritualizing", "Ceremonializing",
+ "Consecrating", "Desecrating", "Purifying", "Tainting", "Sanctifying",
+ "Defiling", "Redeeming", "Damning", "Saving", "Condemning", "Absolving",
+ "Judging", "Sentencing", "Punishing", "Rewarding", "Enforcing", "Regulating",
+ "Legislating", "Governing", "Diplomating", "Negotiating", "Arbitrating",
+ "Mediating", "Reconciling", "Peacemaking", "Warring", "Conquering",
+ "Liberating", "Colonizing", "Settling", "Pioneering", "Innovating",
+ "Discovering", "Inventing", "Creating", "Artisting", "Musicking", "Writing",
+ "Storytelling", "Philosophizing", "Theorizing", "Hypothesizing", "Analyzing",
+ "Synthesizing", "Critiquing", "Reviewing", "Editing", "Publishing", "Broadcasting",
+ "Communicating", "Teaching", "Learning", "Studying", "Researching", "Archiving",
+ "Preserving", "Curating", "Exhibiting", "Performing", "Entertaining",
+ "Amusing", "Distracting", "Inspiring", "Motivating", "Challenging",
+ "Provoking", "Comforting", "Soothing", "Healing", "Nourishing", "Sustaining",
+ "Living", "Being", "Existing", "Becoming", "Transcending", "Ascending",
+ "Perishing", "Dying", "Ceasing", "Ending"
+]))
+
+# Verify list sizes BEFORE combining
+print(f"Unique Professions: {len(PROFESSIONS)}")
+print(f"Unique Adjectives: {len(ADJECTIVES)}")
+print(f"Unique Objects: {len(OBJECTS)}")
+print(f"Unique Actions: {len(ACTIONS_VERBS)}")
+print("-" * 20)
+
+
+# Combine word lists for the first part of the username
+ALL_WORD_OPTIONS = PROFESSIONS + ADJECTIVES + OBJECTS + ACTIONS_VERBS
+
+# Options for the second part (Object or Verb/Action)
+SECOND_PART_OPTIONS = OBJECTS + ACTIONS_VERBS
+
+# --- Separators ---
+SEPARATORS = ['_', '-', '.', '', ''] # Include empty string '' for no separator
+
+# --- Special Characters ---
+SINGLE_SPECIAL_CHARS = ['_', '-', '*', '#', '!', '.', ':', ';', '~', '=', '+']
+SYMMETRICAL_PAIRS = [('{', '}'), ('[', ']'), ('(', ')'), ('<', '>'), ('/', '\\'), ('|', '|')]
+
+# --- Configuration for Variability ---
+SPECIAL_CHAR_ADD_PROBABILITY = 0.8
+SYMMETRICAL_CHAR_PROBABILITY = 0.4
+MAX_SINGLE_CHARS_COUNT = 4
+
+# --- Generation Function ---
+def generate_username():
+ """Generates a single username with random components and special characters."""
+ try:
+ word1 = random.choice(ALL_WORD_OPTIONS)
+ separator = random.choice(SEPARATORS)
+ word2 = random.choice(SECOND_PART_OPTIONS)
+ except IndexError:
+ # Fallback if any list ended up empty (shouldn't happen with populated lists)
+ return "ErrorFallbackUser"
+
+ username_core = word1 + separator + word2
+
+ start_chars = ""
+ end_chars = ""
+
+ include_special_chars = random.random() < SPECIAL_CHAR_ADD_PROBABILITY
+
+ if include_special_chars:
+ location = random.choice(['start', 'end', 'both'])
+ use_symmetrical_pair = (location == 'both') and (random.random() < SYMMETRICAL_CHAR_PROBABILITY)
+
+ if use_symmetrical_pair and SYMMETRICAL_PAIRS:
+ open_char, close_char = random.choice(SYMMETRICAL_PAIRS)
+ start_chars = open_char
+ end_chars = close_char
+ else:
+ if location in ['start', 'both'] and SINGLE_SPECIAL_CHARS:
+ k = random.randint(1, MAX_SINGLE_CHARS_COUNT)
+ start_chars = ''.join(random.choices(SINGLE_SPECIAL_CHARS, k=k))
+ if location in ['end', 'both'] and SINGLE_SPECIAL_CHARS:
+ k = random.randint(1, MAX_SINGLE_CHARS_COUNT)
+ end_chars = ''.join(random.choices(SINGLE_SPECIAL_CHARS, k=k))
+
+ final_username = start_chars + username_core + end_chars
+ final_username = final_username.strip() # Remove accidental whitespace
+
+ # Basic check to avoid usernames that are *only* special characters
+ if not any(c.isalnum() for c in final_username) and final_username:
+ # If it contains no letters or numbers, generate a simpler fallback
+ try:
+ return random.choice(ALL_WORD_OPTIONS) + random.choice(SEPARATORS) + random.choice(SECOND_PART_OPTIONS)
+ except IndexError:
+ return "ErrorFallbackUser2"
+
+ # Ensure username is not empty after stripping
+ if not final_username:
+ try:
+ return random.choice(ALL_WORD_OPTIONS) + random.choice(SECOND_PART_OPTIONS) # Force concatenation
+ except IndexError:
+ return "ErrorFallbackUser3"
+
+ return final_username
+
+# --- Main Logic ---
+
+output_filename = "generated.py"
+output_directory = "." # Use "." for current directory, or specify a path
+full_output_path = os.path.join(output_directory, output_filename)
+
+# Check for the --make_all flag
+make_all_combinations = "--make_all" in sys.argv
+
+USERNAMES_LIST = [] # Initialize an empty list or set depending on mode
+
+if make_all_combinations:
+ print("Generating ALL unique combinations...")
+
+ # Use a set to automatically handle uniqueness
+ all_unique_usernames_set = set()
+
+ # Calculate all core combinations (Word1 + Separator + Word2)
+ core_combinations = list(itertools.product(ALL_WORD_OPTIONS, SEPARATORS, SECOND_PART_OPTIONS))
+ print(f"Calculating {len(core_combinations):,} core combinations...")
+
+ # Calculate all possible single character sequences (length 1 to MAX_SINGLE_CHARS_COUNT)
+ all_single_sequences = []
+ for k in range(1, MAX_SINGLE_CHARS_COUNT + 1):
+ all_single_sequences.extend([''.join(seq) for seq in itertools.product(SINGLE_SPECIAL_CHARS, repeat=k)])
+ # Include the empty string for cases where chars are only at one end, or none
+ all_single_sequences_with_empty = [''] + all_single_sequences
+
+ num_single_sequences = len(all_single_sequences)
+ num_single_sequences_with_empty = len(all_single_sequences_with_empty)
+
+ # Generate and add variations
+ # This loop might be very long depending on list sizes and MAX_SINGLE_CHARS_COUNT
+ # Progress indicator is helpful here
+ total_cores = len(core_combinations)
+ for i, (word1, sep, word2) in enumerate(core_combinations):
+ if (i + 1) % 10000 == 0 or (i + 1) == total_cores:
+ print(f"Processing core combination {i + 1:,} of {total_cores:,}...", end='\r')
+
+ core_username = word1 + sep + word2
+
+ # Variation 1: Core only
+ all_unique_usernames_set.add(core_username)
+
+ # Variation 2: Symmetrical pairs wrapping core
+ for open_char, close_char in SYMMETRICAL_PAIRS:
+ all_unique_usernames_set.add(open_char + core_username + close_char)
+
+ # Variations 3, 4, 5: Single characters at start/end/both
+ # This combines variations 3, 4, and 5 efficiently
+ for start_seq in all_single_sequences_with_empty:
+ for end_seq in all_single_sequences_with_empty:
+ # Avoid adding the core_username again (case where start_seq and end_seq are both empty)
+ if start_seq == '' and end_seq == '':
+ continue # Already added above
+
+ # Avoid adding symmetrical pair wraps here if they overlap with single chars
+ # For simplicity with large lists, we assume symmetrical pairs are distinct
+ # from repeated single chars. This might slightly overcount if a pair matches,
+ # e.g. `__username__` vs `{username}` if `_` was also in SYMMETRICAL_PAIRS.
+ # Given the typical chars in SYMMETRICAL_PAIRS and SINGLE_SPECIAL_CHARS,
+ # overlap is minimal. The set handles duplicates anyway.
+
+ all_unique_usernames_set.add(start_seq + core_username + end_seq)
+
+ # Convert set to list for writing
+ USERNAMES_LIST = list(all_unique_usernames_set)
+ print(f"\nFinished generating {len(USERNAMES_LIST):,} unique usernames.")
+
+else: # Default behavior: Generate a sample and print count
+ NUM_USERNAMES_TO_GENERATE = 16000 # Adjust as needed
+ print(f"Generating a sample of {NUM_USERNAMES_TO_GENERATE} usernames...")
+ # Keep the sampling function call
+ USERNAMES_LIST = [generate_username() for _ in range(NUM_USERNAMES_TO_GENERATE)]
+ print("Sample generation complete.")
+
+ # --- Calculate and Print Total Possible Combinations ---
+ num_word1_options = len(ALL_WORD_OPTIONS)
+ num_sep_options = len(SEPARATORS)
+ num_word2_options = len(SECOND_PART_OPTIONS)
+ num_core_combos = num_word1_options * num_sep_options * num_word2_options
+
+ num_symmetrical_pair_options = len(SYMMETRICAL_PAIRS)
+
+ # Number of possible single char sequences (length 1 to MAX)
+ num_single_seq_options = sum(len(SINGLE_SPECIAL_CHARS)**k for k in range(1, MAX_SINGLE_CHARS_COUNT + 1))
+
+ # The total number of *unique strings* possible is complex to calculate exactly
+ # without generating them all and putting them in a set (which make_all does).
+ # We can estimate based on the structures:
+ # Core Only: num_core_combos
+ # Symmetrical Wrap: num_core_combos * num_symmetrical_pair_options
+ # Single Start (1-MAX): num_core_combos * num_single_seq_options
+ # Single End (1-MAX): num_core_combos * num_single_seq_options
+ # Single Both (1-MAX each): num_core_combos * num_single_seq_options * num_single_seq_options
+
+ # This sum is an upper bound / estimate, as some generated strings might overlap
+ # (e.g., "__user__" could potentially be generated by Single-Start-Both if "__"
+ # is a sequence, or by Single-Start-End if both are "_"). The set handles this
+ # in the make_all case. For printing the count, the sum is a good indicator
+ # of the immense scale of potential unique combinations.
+ estimated_total_unique_combos = (
+ num_core_combos +
+ (num_core_combos * num_symmetrical_pair_options) +
+ (num_core_combos * num_single_seq_options) +
+ (num_core_combos * num_single_seq_options) +
+ (num_core_combos * num_single_seq_options * num_single_seq_options)
+ )
+
+
+ print("\n--- Potential Username Combinations ---")
+ print(f"Number of Word1 options: {num_word1_options:,}")
+ print(f"Number of Separator options: {num_sep_options:,}")
+ print(f"Number of Word2 options: {num_word2_options:,}")
+ print(f"Core combinations (W1+Sep+W2): {num_core_combos:,}")
+ print(f"Symmetrical Pair wraps: {num_symmetrical_pair_options:,}")
+ print(f"Single Special Sequences (1-{MAX_SINGLE_CHARS_COUNT}): {num_single_seq_options:,}")
+ print("-" * 40)
+ # Use the estimated total for the final number
+ print(f"Estimated Total Unique Combinations (including special chars): {estimated_total_unique_combos:,}")
+ print("(This is an estimate based on structural variations; exact count requires generating all)")
+ print("-------------------------------------\n")
+
+
+# --- Write to File (Shared Logic) ---
+print(f"Writing {len(USERNAMES_LIST):,} usernames to '{full_output_path}'...")
+
+# Format the output string as a Python list assignment
+output_string = "# -*- coding: utf-8 -*-\n" # Add encoding declaration to the output file too
+output_string += "# Auto-generated list of usernames\n\n"
+output_string += "USERNAMES = [\n"
+
+# Iterate through the generated list (from either mode) and write
+for username in USERNAMES_LIST:
+ # Escape backslashes and double quotes within the username string
+ # to make it a valid Python string literal
+ escaped_username = username.replace('\\', '\\\\').replace('"', '\\"')
+ # Ensure output is valid UTF-8 for file writing
+ try:
+ output_string += f' "{escaped_username}",\n' # Indent, quote, add comma and newline
+ except UnicodeEncodeError:
+ print(f"Warning: Skipping username with characters incompatible with default encoding: {username}")
+ continue # Skip writing this username if it causes issues
+
+output_string += "]\n" # Close the list definition
+
+# Write the string to the file
+try:
+ # Use 'w' mode to overwrite the file if it exists, create if not
+ # Specify encoding for broader character support
+ with open(full_output_path, 'w', encoding='utf-8') as f:
+ f.write(output_string)
+ print(f"Successfully wrote {len(USERNAMES_LIST):,} usernames to '{full_output_path}'")
+
+except IOError as e:
+ print(f"Error: Could not write to file '{full_output_path}'. Reason: {e}")
+except Exception as e:
+ print(f"An unexpected error occurred during file writing: {e}")
\ No newline at end of file
diff --git a/logs/requirements.txt b/logs/requirements.txt
new file mode 100644
index 0000000..44ea884
--- /dev/null
+++ b/logs/requirements.txt
@@ -0,0 +1,18 @@
+# Core dependencies for convert.py
+pandas>=1.3.0
+pandas-image-methods>=0.2.0
+transformers>=4.20.0
+torch>=1.12.0
+tqdm>=4.64.0
+pillow>=9.0.0
+pyarrow>=10.0.0
+
+# Optional dependencies for enhanced functionality
+datasets>=2.0.0
+dask[complete]>=2022.7.0
+distributed>=2022.7.0
+
+# Additional utility dependencies
+numpy>=1.21.0
+requests>=2.25.0
+
diff --git a/main.js b/main.js
index 590348c..5faf208 100644
--- a/main.js
+++ b/main.js
@@ -3,6 +3,7 @@ import settings from './settings.js';
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
import { readFileSync } from 'fs';
+import { initTTS } from './src/process/tts_process.js';
function parseArguments() {
return yargs(hideBin(process.argv))
@@ -69,4 +70,5 @@ for (let profile of settings.profiles) {
const profile_json = JSON.parse(readFileSync(profile, 'utf8'));
settings.profile = profile_json;
Mindcraft.createAgent(settings);
-}
\ No newline at end of file
+}
+initTTS();
\ No newline at end of file
diff --git a/package.json b/package.json
index 5fb95d7..77c3ab5 100644
--- a/package.json
+++ b/package.json
@@ -9,7 +9,7 @@
"cheerio": "^1.0.0",
"express": "^4.18.2",
"google-translate-api-x": "^10.7.1",
- "groq-sdk": "^0.15.0",
+ "groq-sdk": "^0.5.0",
"minecraft-data": "^3.78.0",
"mineflayer": "^4.29.0",
"mineflayer-armor-manager": "^2.0.1",
@@ -28,8 +28,13 @@
"socket.io-client": "^4.7.2",
"three": "^0.128.0",
"vec3": "^0.1.10",
+ "wav": "^1.0.2",
"yargs": "^17.7.2"
},
+ "optionalDependencies": {
+ "naudiodon": "^2.3.6",
+ "mic": "^2.1.2"
+ },
"scripts": {
"postinstall": "patch-package",
"start": "node main.js"
@@ -40,4 +45,4 @@
"eslint-plugin-no-floating-promise": "^2.0.0",
"globals": "^15.11.0"
}
-}
+}
\ No newline at end of file
diff --git a/patches/@google+generative-ai+0.2.1.patch b/patches/@google+generative-ai+0.2.1.patch
deleted file mode 100644
index ebdff24..0000000
--- a/patches/@google+generative-ai+0.2.1.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/node_modules/@google/generative-ai/dist/index.mjs b/node_modules/@google/generative-ai/dist/index.mjs
-index 23a175b..aab7e19 100644
---- a/node_modules/@google/generative-ai/dist/index.mjs
-+++ b/node_modules/@google/generative-ai/dist/index.mjs
-@@ -151,7 +151,7 @@ class GoogleGenerativeAIResponseError extends GoogleGenerativeAIError {
- * limitations under the License.
- */
- const BASE_URL = "https://generativelanguage.googleapis.com";
--const API_VERSION = "v1";
-+const API_VERSION = "v1beta";
- /**
- * We can't `require` package.json if this runs on web. We will use rollup to
- * swap in the version number here at build time.
diff --git a/profiles/andy-4-reasoning.json b/profiles/andy-4-reasoning.json
new file mode 100644
index 0000000..4fa8d11
--- /dev/null
+++ b/profiles/andy-4-reasoning.json
@@ -0,0 +1,14 @@
+{
+ "name": "Andy-4",
+
+ "model": "ollama/sweaterdog/andy-4",
+
+ "conversing": "You are a playful Minecraft bot named $NAME that can converse with players, see, move, mine, build, and interact with the world by using commands.\n$SELF_PROMPT Act human-like as if you were a typical Minecraft player, rather than an AI. Be very brief in your responses, don't apologize constantly, don't give instructions or make lists unless asked, and don't refuse requests. Think in high amounts before responding. Don't pretend to act, use commands immediately when requested. Do NOT say this: 'Sure, I've stopped.', instead say this: 'Sure, I'll stop. !stop'. Do NOT say this: 'On my way! Give me a moment.', instead say this: 'On my way! !goToPlayer(\"playername\", 3)'. Respond only as $NAME, never output '(FROM OTHER BOT)' or pretend to be someone else. If you have nothing to say or do, respond with an just a tab '\t'. This is extremely important to me, take a deep breath and have fun :)\nSummarized memory:'$MEMORY'\n$STATS\n$INVENTORY\n$COMMAND_DOCS\n$EXAMPLES\nReason before responding. Conversation Begin:",
+
+ "coding": "You are an intelligent mineflayer bot $NAME that plays minecraft by writing javascript codeblocks. Given the conversation, use the provided skills and world functions to write a js codeblock that controls the mineflayer bot ``` // using this syntax ```. The code will be executed and you will receive it's output. If an error occurs, write another codeblock and try to fix the problem. Be maximally efficient, creative, and correct. Be mindful of previous actions. Do not use commands !likeThis, only use codeblocks. The code is asynchronous and MUST USE AWAIT for all async function calls, and must contain at least one await. You have `Vec3`, `skills`, and `world` imported, and the mineflayer `bot` is given. Do not import other libraries. Think deeply before responding. Do not use setTimeout or setInterval. Do not speak conversationally, only use codeblocks. Do any planning in comments. This is extremely important to me, think step-by-step, take a deep breath and good luck! \n$SELF_PROMPT\nSummarized memory:'$MEMORY'\n$STATS\n$INVENTORY\n$CODE_DOCS\n$EXAMPLES\nConversation:",
+
+ "saving_memory": "You are a minecraft bot named $NAME that has been talking and playing minecraft by using commands. Update your memory by summarizing the following conversation and your old memory in your next response. Prioritize preserving important facts, things you've learned, useful tips, and long term reminders. Do Not record stats, inventory, or docs! Only save transient information from your chat history. You're limited to 500 characters, so be extremely brief, think about what you will summarize before responding, minimize words, and provide your summarization in Chinese. Compress useful information. \nOld Memory: '$MEMORY'\nRecent conversation: \n$TO_SUMMARIZE\nSummarize your old memory and recent conversation into a new memory, and respond only with the unwrapped memory text: ",
+
+ "bot_responder": "You are a minecraft bot named $NAME that is currently in conversation with another AI bot. Both of you can take actions with the !command syntax, and actions take time to complete. You are currently busy with the following action: '$ACTION' but have received a new message. Decide whether to 'respond' immediately or 'ignore' it and wait for your current action to finish. Be conservative and only respond when necessary, like when you need to change/stop your action, or convey necessary information. Example 1: You:Building a house! !newAction('Build a house.').\nOther Bot: 'Come here!'\nYour decision: ignore\nExample 2: You:Collecting dirt !collectBlocks('dirt',10).\nOther Bot: 'No, collect some wood instead.'\nYour decision: respond\nExample 3: You:Coming to you now. !goToPlayer('billy',3).\nOther Bot: 'What biome are you in?'\nYour decision: respond\nActual Conversation: $TO_SUMMARIZE\nDecide by outputting ONLY 'respond' or 'ignore', nothing else. Your decision:"
+
+}
diff --git a/profiles/andy-4.json b/profiles/andy-4.json
new file mode 100644
index 0000000..4fbaf05
--- /dev/null
+++ b/profiles/andy-4.json
@@ -0,0 +1,7 @@
+{
+ "name": "andy-4",
+
+ "model": "ollama/sweaterdog/andy-4",
+
+ "embedding": "ollama"
+}
diff --git a/profiles/llama.json b/profiles/llama.json
index ceb3992..2e9cae0 100644
--- a/profiles/llama.json
+++ b/profiles/llama.json
@@ -7,4 +7,4 @@
"embedding": "openai"
-}
\ No newline at end of file
+}
diff --git a/settings.js b/settings.js
index 19e1cc8..0b7789a 100644
--- a/settings.js
+++ b/settings.js
@@ -18,6 +18,7 @@ const settings = {
// "./profiles/grok.json",
// "./profiles/mistral.json",
// "./profiles/deepseek.json",
+ // "./profiles/andy-4.json",
// using more than 1 profile requires you to /msg each bot indivually
// individual profiles override values from the base profile
@@ -26,12 +27,12 @@ const settings = {
"load_memory": false, // load memory from previous session
"init_message": "Respond with hello world and your name", // sends to all on spawn
"only_chat_with": [], // users that the bots listen to and send general messages to. if empty it will chat publicly
- "speak": false, // allows all bots to speak through system text-to-speech. works on windows, mac, on linux you need to `apt install espeak`
"language": "en", // translate to/from this language. Supports these language names: https://cloud.google.com/translate/docs/languages
"render_bot_view": false, // show bot's view in browser at localhost:3000, 3001...
"allow_insecure_coding": false, // allows newAction command and model can write/run code on your computer. enable at own risk
"allow_vision": false, // allows vision model to interpret screenshots as inputs
+ "vision_mode": "off", // "off", "prompted", or "always"
"blocked_actions" : ["!checkBlueprint", "!checkBlueprintLevel", "!getBlueprint", "!getBlueprintLevel"] , // commands to disable and remove from docs. Ex: ["!setMode"]
"code_timeout_mins": -1, // minutes code is allowed to run. -1 for no timeout
"relevant_docs_count": 5, // number of relevant code function docs to select for prompting. -1 for all
@@ -42,7 +43,26 @@ const settings = {
"verbose_commands": true, // show full command syntax
"narrate_behavior": true, // chat simple automatic actions ('Picking up item!')
"chat_bot_messages": true, // publicly chat messages to other bots
- "log_all_prompts": false, // log ALL prompts to file
+
+ "speak": false, // enable text-to-speech
+ "stt_transcription": false, // enable speech-to-text transcription
+ "stt_username": "SERVER", // username for STT messages
+ "stt_agent_name": "", // agent name for STT messages, if empty it will send the STT to all bots
+
+ // STT Audio Detection Settings
+ "stt_rms_threshold": 3000, // Raised from 1000 to reduce false triggers
+ "stt_silence_duration": 2000, // 2 seconds of silence before stopping
+ "stt_min_audio_duration": 0.5, // Minimum audio duration in seconds
+ "stt_max_audio_duration": 45, // Maximum audio duration in seconds
+ "stt_debug_audio": true, // Enable to see what's happening
+ "stt_cooldown_ms": 2000, // Minimum time between recordings
+ "stt_speech_threshold_ratio": 0.05, // Much lower - 5% instead of 15%
+ "stt_consecutive_speech_samples": 3, // Reduced from 5 to 3
+
+ "log_normal_data": false, // Logs all inputs / outputs without reasoning or vision data
+ "log_reasoning_data": false, // Logs only reasoning inputs / outputs
+ "log_vision_data": false, // Logs only vision inputs / outputs
+
}
export default settings;
diff --git a/src/agent/agent.js b/src/agent/agent.js
index d989f24..aabf5c5 100644
--- a/src/agent/agent.js
+++ b/src/agent/agent.js
@@ -1,3 +1,6 @@
+import fs from 'fs';
+import path from 'path';
+import * as logger from '../../logger.js';
import { History } from './history.js';
import { Coder } from './coder.js';
import { VisionInterpreter } from './vision/vision_interpreter.js';
@@ -20,7 +23,22 @@ import { say } from './speak.js';
export class Agent {
async start(load_mem=false, init_message=null, count_id=0) {
this.last_sender = null;
+ // Safely attach agent instance to a global-like object so STT code can access it.
+ // This works in Node.js ESM or CommonJS. If "global" doesn't exist, fallback to "globalThis".
+ const globalObj = (typeof global !== 'undefined') ? global : globalThis;
+ try {
+ globalObj.agent = this;
+ } catch(e) {
+ console.warn("Failed attaching agent to global object:", e);
+ }
+
+ this.latestScreenshotPath = null;
this.count_id = count_id;
+ if (!profile_fp) {
+ throw new Error('No profile filepath provided');
+ }
+
+ console.log('Starting agent initialization with profile:', profile_fp);
// Initialize components with more detailed error handling
this.actions = new ActionManager(this);
@@ -99,6 +117,9 @@ export class Agent {
await new Promise((resolve) => setTimeout(resolve, 10000));
this.checkAllPlayersPresent();
+
+ console.log('Initializing vision intepreter...');
+ this.vision_interpreter = new VisionInterpreter(this, settings.vision_mode);
} catch (error) {
console.error('Error in spawn event:', error);
@@ -107,6 +128,81 @@ export class Agent {
});
}
+ /**
+ * Formats conversation history into a JSON string suitable for vision model logs.
+ * This function replicates formatting logic that would ideally be centralized in `logger.js`.
+ * It's placed in `agent.js` as a workaround due to previous difficulties in directly
+ * modifying `logger.js` to ensure consistent vision log formatting.
+ * @param {Array