From 49cce2234d230c5ad8c3fce3b929261a843d7858 Mon Sep 17 00:00:00 2001 From: uukelele-scratch Date: Sun, 20 Apr 2025 15:43:27 +0100 Subject: [PATCH 1/8] Added pollinations text support. --- src/models/pollinations.js | 47 ++++++++++++++++++++++++++++++++++++++ src/models/prompter.js | 5 ++++ 2 files changed, 52 insertions(+) create mode 100644 src/models/pollinations.js diff --git a/src/models/pollinations.js b/src/models/pollinations.js new file mode 100644 index 0000000..b21c8e7 --- /dev/null +++ b/src/models/pollinations.js @@ -0,0 +1,47 @@ +import { strictFormat } from "../utils/text.js"; + +export class Pollinations { + // models: https://text.pollinations.ai/models + constructor(model_name, url, params) { + this.model_name = model_name; + this.params = params; + this.url = url || "https://text.pollinations.ai/openai"; + } + + async sendRequest(turns, systemMessage) { + let messages = [{'role': 'system', 'content': systemMessage}].concat(turns); + + const payload = { + model: this.model_name || "openai-large", + messages: strictFormat(messages), + seed: Math.floor( Math.random() * (99999) ), + referrer: "mindcraft", + ...(this.params || {}) + }; + + let res = null; + + try { + console.log(`Awaiting pollinations response from model`, this.model_name); + const response = await fetch(this.url, { + method: "POST", + headers: { + "Content-Type": "application/json" + }, + body: JSON.stringify(payload) + }); + if (!response.ok) { + console.error(`Failed to receive response. Status`, response.status, response.text); + res = "My brain disconnected, try again."; + } else { + const result = await response.json(); + res = result.choices[0].message.content; + } + } catch (err) { + console.error(`Failed to receive response.`, err || err.message); + res = "My brain disconnected, try again."; + } + return res; + } +} + diff --git a/src/models/prompter.js b/src/models/prompter.js index 46f7760..445e3ba 100644 --- a/src/models/prompter.js +++ b/src/models/prompter.js @@ -21,6 +21,7 @@ import { DeepSeek } from './deepseek.js'; import { Hyperbolic } from './hyperbolic.js'; import { GLHF } from './glhf.js'; import { OpenRouter } from './openrouter.js'; +import { Pollinations } from './pollinations.js'; export class Prompter { constructor(agent, fp) { @@ -133,6 +134,8 @@ export class Prompter { profile.api = 'openrouter'; // must do first because shares names with other models else if (profile.model.includes('ollama/')) profile.api = 'ollama'; // also must do early because shares names with other models + else if (profile.model.includes('pollinations/')) + profile.api = 'pollinations'; // also shares some model names like llama else if (profile.model.includes('gemini')) profile.api = 'google'; else if (profile.model.includes('gpt') || profile.model.includes('o1')|| profile.model.includes('o3')) @@ -198,6 +201,8 @@ export class Prompter { model = new DeepSeek(profile.model, profile.url, profile.params); else if (profile.api === 'openrouter') model = new OpenRouter(profile.model.replace('openrouter/', ''), profile.url, profile.params); + else if (profile.api === 'pollinations') + model = new Pollinations(profile.model.replace('pollinations/', ''), profile.url, profile.params); else throw new Error('Unknown API:', profile.api); return model; From 2b3eb716e0399d8490bdad2d20d3819300244536 Mon Sep 17 00:00:00 2001 From: uukelele-scratch Date: Sun, 20 Apr 2025 15:48:13 +0100 Subject: [PATCH 2/8] Added pollinations vision support. --- src/models/pollinations.js | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/models/pollinations.js b/src/models/pollinations.js index b21c8e7..5157bb5 100644 --- a/src/models/pollinations.js +++ b/src/models/pollinations.js @@ -43,5 +43,23 @@ export class Pollinations { } return res; } + + async sendVisionRequest(messages, systemMessage, imageBuffer) { + const imageMessages = [...messages]; + imageMessages.push({ + role: "user", + content: [ + { type: "text", text: systemMessage }, + { + type: "image_url", + image_url: { + url: `data:image/jpeg;base64,${imageBuffer.toString('base64')}` + } + } + ] + }); + + return this.sendRequest(imageMessages, systemMessage) + } } From d263a5fddefb90b829f9db8134cefc7df7a231be Mon Sep 17 00:00:00 2001 From: uukelele-scratch Date: Sun, 20 Apr 2025 18:20:44 +0100 Subject: [PATCH 3/8] Added pollinations TTS support. --- settings.js | 7 +++- src/agent/agent.js | 2 +- src/agent/speak.js | 84 ++++++++++++++++++++++++++------------ src/models/pollinations.js | 51 ++++++++++++++++++++++- 4 files changed, 114 insertions(+), 30 deletions(-) diff --git a/settings.js b/settings.js index e63c9fb..d5e8b13 100644 --- a/settings.js +++ b/settings.js @@ -28,8 +28,11 @@ const settings = { "load_memory": false, // load memory from previous session "init_message": "Respond with hello world and your name", // sends to all on spawn "only_chat_with": [], // users that the bots listen to and send general messages to. if empty it will chat publicly - "speak": false, // allows all bots to speak through system text-to-speech. works on windows, mac, on linux you need to `apt install espeak` - "language": "en", // translate to/from this language. Supports these language names: https://cloud.google.com/translate/docs/languages + + "speak": true, + "speak_model": "pollinations/openai-audio/echo", // allows all bots to speak through text-to-speach. format: {provider}/{model}/{voice}. if set to "system" it will use system text-to-speech, which works on windows and mac, but on linux you need to `apt install espeak`. + + "language": "en", // translate to/from this language. NOT text-to-speech language. Supports these language names: https://cloud.google.com/translate/docs/languages "show_bot_views": false, // show bot's view in browser at localhost:3000, 3001... "allow_insecure_coding": false, // allows newAction command and model can write/run code on your computer. enable at own risk diff --git a/src/agent/agent.js b/src/agent/agent.js index cdea40b..300640f 100644 --- a/src/agent/agent.js +++ b/src/agent/agent.js @@ -15,7 +15,7 @@ import { addBrowserViewer } from './vision/browser_viewer.js'; import settings from '../../settings.js'; import { serverProxy } from './agent_proxy.js'; import { Task } from './tasks.js'; -import { say } from './speak.js'; +import { Speaker } from './speak.js'; export class Agent { async start(profile_fp, load_mem=false, init_message=null, count_id=0, task_path=null, task_id=null) { diff --git a/src/agent/speak.js b/src/agent/speak.js index e5fe658..94332af 100644 --- a/src/agent/speak.js +++ b/src/agent/speak.js @@ -1,43 +1,75 @@ -import { exec } from 'child_process'; +import { exec, spawn } from 'child_process'; +import settings from '../../settings.js'; +import { Pollinations } from '../models/pollinations.js'; let speakingQueue = []; let isSpeaking = false; -export function say(textToSpeak) { - speakingQueue.push(textToSpeak); - if (!isSpeaking) { - processQueue(); - } +export function say(text) { + speakingQueue.push(text); + if (!isSpeaking) processQueue(); } -function processQueue() { +async function processQueue() { if (speakingQueue.length === 0) { isSpeaking = false; return; } - isSpeaking = true; - const textToSpeak = speakingQueue.shift(); - const isWin = process.platform === "win32"; - const isMac = process.platform === "darwin"; + const txt = speakingQueue.shift(); - let command; + const isWin = process.platform === 'win32'; + const isMac = process.platform === 'darwin'; + const model = settings.speak_model || 'system'; + + if (model === 'system') { + // system TTS + const cmd = isWin + ? `powershell -NoProfile -Command "Add-Type -AssemblyName System.Speech; \ +$s=New-Object System.Speech.Synthesis.SpeechSynthesizer; $s.Rate=2; \ +$s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"` + : isMac + ? `say "${txt.replace(/"/g,'\\"')}"` + : `espeak "${txt.replace(/"/g,'\\"')}"`; + + exec(cmd, err => { + if (err) console.error('TTS error', err); + processQueue(); + }); - if (isWin) { - command = `powershell -Command "Add-Type -AssemblyName System.Speech; $s = New-Object System.Speech.Synthesis.SpeechSynthesizer; $s.Rate = 2; $s.Speak(\\"${textToSpeak}\\"); $s.Dispose()"`; - } else if (isMac) { - command = `say "${textToSpeak}"`; } else { - command = `espeak "${textToSpeak}"`; - } + // remote audio provider + const [prov, mdl, voice] = model.split('/'); + if (prov !== 'pollinations') throw new Error(`Unknown provider: ${prov}`); - exec(command, (error, stdout, stderr) => { - if (error) { - console.error(`Error: ${error.message}`); - console.error(`${error.stack}`); - } else if (stderr) { - console.error(`Error: ${stderr}`); + try { + const audioData = await new Pollinations(mdl).sendAudioRequest(txt, voice); + + if (isWin) { + const ps = ` + Add-Type -AssemblyName presentationCore; + $p=New-Object System.Windows.Media.MediaPlayer; + $p.Open([Uri]::new("data:audio/mp3;base64,${audioData}")); + $p.Play(); + Start-Sleep -Seconds [math]::Ceiling($p.NaturalDuration.TimeSpan.TotalSeconds); + `; + spawn('powershell', ['-NoProfile','-Command', ps], { + stdio: 'ignore', detached: true + }).unref(); + processQueue(); + + } else { + const player = spawn('ffplay', ['-nodisp','-autoexit','pipe:0'], { + stdio: ['pipe','ignore','ignore'] + }); + player.stdin.write(Buffer.from(audioData, 'base64')); + player.stdin.end(); + player.on('exit', processQueue); + } + + } catch (e) { + console.error('Audio error', e); + processQueue(); } - processQueue(); // Continue with the next message in the queue - }); + } } diff --git a/src/models/pollinations.js b/src/models/pollinations.js index 5157bb5..57dc2a3 100644 --- a/src/models/pollinations.js +++ b/src/models/pollinations.js @@ -31,7 +31,7 @@ export class Pollinations { body: JSON.stringify(payload) }); if (!response.ok) { - console.error(`Failed to receive response. Status`, response.status, response.text); + console.error(`Failed to receive response. Status`, response.status, (await response.text())); res = "My brain disconnected, try again."; } else { const result = await response.json(); @@ -61,5 +61,54 @@ export class Pollinations { return this.sendRequest(imageMessages, systemMessage) } + + async sendAudioRequest(text, voice) { + const fallback = "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU5LjI3LjEwMAAAAAAAAAAAAAAA/+NAwAAAAAAAAAAAAEluZm8AAAAPAAAAAAAAANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAExhdmM1OS4zNwAAAAAAAAAAAAAAAAAAAAAAAAAAAADQAAAeowAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="; + // ^ 0 second silent audio clip + + const payload = { + model: this.model_name, + modalities: ["text", "audio"], + audio: { + voice: voice, + format: "mp3", + }, + messages: [ + { + role: "developer", + content: "You are an AI that echoes. Your sole function is to repeat back everything the user says to you exactly as it is written. This includes punctuation, grammar, language, and text formatting. Do not add, remove, or alter anything in the user's input in any way. Respond only with an exact duplicate of the user’s query." + // this is required because pollinations attempts to send an AI response to the text instead of just saying the text. + }, + { + role: "user", + content: text + } + ] + } + + let audioData = null; + + try { + const response = await fetch(this.url, { + method: "POST", + headers: { + "Content-Type": "application/json" + }, + body: JSON.stringify(payload) + }) + + if (!response.ok) { + console.error("Failed to get text transcription. Status", response.status, (await response.text())) + return fallback + } + + const result = await response.json(); + audioData = result.choices[0].message.audio.data; + return audioData; + } catch (err) { + console.error("TTS fetch failed:", err); + return fallback + } + } } From c540b7d92fcf8190079dd831aea68bf3f5a69dda Mon Sep 17 00:00:00 2001 From: uukelele-scratch Date: Sun, 20 Apr 2025 18:27:57 +0100 Subject: [PATCH 4/8] fix: renamed import --- src/agent/agent.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agent/agent.js b/src/agent/agent.js index 300640f..cdea40b 100644 --- a/src/agent/agent.js +++ b/src/agent/agent.js @@ -15,7 +15,7 @@ import { addBrowserViewer } from './vision/browser_viewer.js'; import settings from '../../settings.js'; import { serverProxy } from './agent_proxy.js'; import { Task } from './tasks.js'; -import { Speaker } from './speak.js'; +import { say } from './speak.js'; export class Agent { async start(profile_fp, load_mem=false, init_message=null, count_id=0, task_path=null, task_id=null) { From 76b96f829e7dbd6d8800db105c58baa4988eb0b4 Mon Sep 17 00:00:00 2001 From: uukelele-scratch Date: Mon, 21 Apr 2025 08:06:19 +0100 Subject: [PATCH 5/8] minor cleanup --- src/agent/speak.js | 10 ++-- src/models/pollinations.js | 94 ++++++++++++++++++-------------------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/src/agent/speak.js b/src/agent/speak.js index 94332af..3d366d3 100644 --- a/src/agent/speak.js +++ b/src/agent/speak.js @@ -1,6 +1,6 @@ import { exec, spawn } from 'child_process'; import settings from '../../settings.js'; -import { Pollinations } from '../models/pollinations.js'; +import { sendAudioRequest } from '../models/pollinations.js'; let speakingQueue = []; let isSpeaking = false; @@ -20,7 +20,7 @@ async function processQueue() { const isWin = process.platform === 'win32'; const isMac = process.platform === 'darwin'; - const model = settings.speak_model || 'system'; + const model = settings.speak_model || 'pollinations/openai-audio/echo'; if (model === 'system') { // system TTS @@ -43,7 +43,11 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"` if (prov !== 'pollinations') throw new Error(`Unknown provider: ${prov}`); try { - const audioData = await new Pollinations(mdl).sendAudioRequest(txt, voice); + let audioData = await sendAudioRequest(txt, mdl, voice); + if (!audioData) { + audioData = "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU5LjI3LjEwMAAAAAAAAAAAAAAA/+NAwAAAAAAAAAAAAEluZm8AAAAPAAAAAAAAANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAExhdmM1OS4zNwAAAAAAAAAAAAAAAAAAAAAAAAAAAADQAAAeowAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="; + // ^ 0 second silent audio clip + } if (isWin) { const ps = ` diff --git a/src/models/pollinations.js b/src/models/pollinations.js index 57dc2a3..1fb89ed 100644 --- a/src/models/pollinations.js +++ b/src/models/pollinations.js @@ -61,54 +61,50 @@ export class Pollinations { return this.sendRequest(imageMessages, systemMessage) } - - async sendAudioRequest(text, voice) { - const fallback = "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU5LjI3LjEwMAAAAAAAAAAAAAAA/+NAwAAAAAAAAAAAAEluZm8AAAAPAAAAAAAAANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAExhdmM1OS4zNwAAAAAAAAAAAAAAAAAAAAAAAAAAAADQAAAeowAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="; - // ^ 0 second silent audio clip - - const payload = { - model: this.model_name, - modalities: ["text", "audio"], - audio: { - voice: voice, - format: "mp3", - }, - messages: [ - { - role: "developer", - content: "You are an AI that echoes. Your sole function is to repeat back everything the user says to you exactly as it is written. This includes punctuation, grammar, language, and text formatting. Do not add, remove, or alter anything in the user's input in any way. Respond only with an exact duplicate of the user’s query." - // this is required because pollinations attempts to send an AI response to the text instead of just saying the text. - }, - { - role: "user", - content: text - } - ] - } - - let audioData = null; - - try { - const response = await fetch(this.url, { - method: "POST", - headers: { - "Content-Type": "application/json" - }, - body: JSON.stringify(payload) - }) - - if (!response.ok) { - console.error("Failed to get text transcription. Status", response.status, (await response.text())) - return fallback - } - - const result = await response.json(); - audioData = result.choices[0].message.audio.data; - return audioData; - } catch (err) { - console.error("TTS fetch failed:", err); - return fallback - } - } } +export async function sendAudioRequest(text, model, voice) { + const payload = { + model: model, + modalities: ["text", "audio"], + audio: { + voice: voice, + format: "mp3", + }, + messages: [ + { + role: "developer", + content: "You are an AI that echoes. Your sole function is to repeat back everything the user says to you exactly as it is written. This includes punctuation, grammar, language, and text formatting. Do not add, remove, or alter anything in the user's input in any way. Respond only with an exact duplicate of the user’s query." + // this is required because pollinations attempts to send an AI response to the text instead of just saying the text. + }, + { + role: "user", + content: text + } + ] + } + + let audioData = null; + + try { + const response = await fetch("https://text.pollinations.ai/openai", { + method: "POST", + headers: { + "Content-Type": "application/json" + }, + body: JSON.stringify(payload) + }) + + if (!response.ok) { + console.error("Failed to get text transcription. Status", response.status, (await response.text())) + return null; + } + + const result = await response.json(); + audioData = result.choices[0].message.audio.data; + return audioData; + } catch (err) { + console.error("TTS fetch failed:", err); + return null; + } +} \ No newline at end of file From 9b3eb9a11b1db293a1ee56b80ac4e49a92565a08 Mon Sep 17 00:00:00 2001 From: uukelele-scratch Date: Mon, 21 Apr 2025 08:53:20 +0100 Subject: [PATCH 6/8] update README --- README.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8c4a4e8..f91c624 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,17 @@ Do not connect this bot to public servers with coding enabled. This project allo - [Minecraft Java Edition](https://www.minecraft.net/en-us/store/minecraft-java-bedrock-edition-pc) (up to v1.21.1, recommend v1.20.4) - [Node.js Installed](https://nodejs.org/) (at least v14) -- One of these: [OpenAI API Key](https://openai.com/blog/openai-api) | [Gemini API Key](https://aistudio.google.com/app/apikey) | [Anthropic API Key](https://docs.anthropic.com/claude/docs/getting-access-to-claude) | [Replicate API Key](https://replicate.com/) | [Hugging Face API Key](https://huggingface.co/) | [Groq API Key](https://console.groq.com/keys) | [Ollama Installed](https://ollama.com/download). | [Mistral API Key](https://docs.mistral.ai/getting-started/models/models_overview/) | [Qwen API Key [Intl.]](https://www.alibabacloud.com/help/en/model-studio/developer-reference/get-api-key)/[[cn]](https://help.aliyun.com/zh/model-studio/getting-started/first-api-call-to-qwen?) | [Novita AI API Key](https://novita.ai/settings?utm_source=github_mindcraft&utm_medium=github_readme&utm_campaign=link#key-management) | +- One of these: + - [OpenAI API Key](https://openai.com/blog/openai-api) + - [Gemini API Key](https://aistudio.google.com/app/apikey) + - [Anthropic API Key](https://docs.anthropic.com/claude/docs/getting-access-to-claude) + - [Replicate API Key](https://replicate.com/) + - [Hugging Face API Key](https://huggingface.co/) + - [Groq API Key](https://console.groq.com/keys) + - [Ollama Installed](https://ollama.com/download) + - [Mistral API Key](https://docs.mistral.ai/getting-started/models/models_overview/) + - [Qwen API Key [Intl.]](https://www.alibabacloud.com/help/en/model-studio/developer-reference/get-api-key)/[[cn]](https://help.aliyun.com/zh/model-studio/getting-started/first-api-call-to-qwen?) + - [Novita AI API Key](https://novita.ai/settings?utm_source=github_mindcraft&utm_medium=github_readme&utm_campaign=link#key-management) ## Install and Run @@ -53,6 +63,7 @@ You can configure the agent's name, model, and prompts in their profile like `an | `openrouter` | `OPENROUTER_API_KEY` | `openrouter/anthropic/claude-3.5-sonnet` | [docs](https://openrouter.ai/models) | | `glhf.chat` | `GHLF_API_KEY` | `glhf/hf:meta-llama/Llama-3.1-405B-Instruct` | [docs](https://glhf.chat/user-settings/api) | | `hyperbolic` | `HYPERBOLIC_API_KEY` | `hyperbolic/deepseek-ai/DeepSeek-V3` | [docs](https://docs.hyperbolic.xyz/docs/getting-started) | +| `pollinations` | n/a | `pollinations/openai-large` | [docs](https://github.com/pollinations/pollinations/blob/master/APIDOCS.md) | If you use Ollama, to install the models used by default (generation and embedding), execute the following terminal command: `ollama pull llama3.1 && ollama pull nomic-embed-text` From 0d7500e8985a519a924ce133e0fa30abe7cdf114 Mon Sep 17 00:00:00 2001 From: uukelele-scratch Date: Mon, 21 Apr 2025 09:06:30 +0100 Subject: [PATCH 7/8] moved speak model into profile.json + major cleanup --- README.md | 6 ++++++ profiles/defaults/_default.json | 2 ++ settings.js | 3 ++- src/agent/agent.js | 6 +++--- src/agent/speak.js | 22 +++++++++++++++------- src/models/pollinations.js | 4 ++-- 6 files changed, 30 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index f91c624..86cb192 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,12 @@ You can pass a string or an object for these fields. A model object must specify "api": "openai", "url": "https://api.openai.com/v1/", "model": "text-embedding-ada-002" +}, +"speak_model": { + "api": "pollinations", + "url": "https://text.pollinations.ai/openai", + "model": "openai-audio", + "voice": "echo" } ``` diff --git a/profiles/defaults/_default.json b/profiles/defaults/_default.json index bf31d22..a0be969 100644 --- a/profiles/defaults/_default.json +++ b/profiles/defaults/_default.json @@ -11,6 +11,8 @@ "image_analysis": "You are a Minecraft bot named $NAME that has been given a screenshot of your current view. Analyze and summarize the view; describe terrain, blocks, entities, structures, and notable features. Focus on details relevant to the conversation. Note: the sky is always blue regardless of weather or time, dropped items are small pink cubes, and blocks below y=0 do not render. Be extremely concise and correct, respond only with your analysis, not conversationally. $STATS", + "speak_model": "pollinations/openai-audio/echo", + "modes": { "self_preservation": true, "unstuck": true, diff --git a/settings.js b/settings.js index d5e8b13..c389f80 100644 --- a/settings.js +++ b/settings.js @@ -30,7 +30,8 @@ const settings = { "only_chat_with": [], // users that the bots listen to and send general messages to. if empty it will chat publicly "speak": true, - "speak_model": "pollinations/openai-audio/echo", // allows all bots to speak through text-to-speach. format: {provider}/{model}/{voice}. if set to "system" it will use system text-to-speech, which works on windows and mac, but on linux you need to `apt install espeak`. + // allows all bots to speak through text-to-speech. format: {provider}/{model}/{voice}. if set to "system" it will use system text-to-speech, which works on windows and mac, but on linux you need to `apt install espeak`. + // specify speech model inside each profile - so that you can have each bot with different voices ;) "language": "en", // translate to/from this language. NOT text-to-speech language. Supports these language names: https://cloud.google.com/translate/docs/languages "show_bot_views": false, // show bot's view in browser at localhost:3000, 3001... diff --git a/src/agent/agent.js b/src/agent/agent.js index cdea40b..5713b66 100644 --- a/src/agent/agent.js +++ b/src/agent/agent.js @@ -357,9 +357,9 @@ export class Agent { } } else { - if (settings.speak) { - say(to_translate); - } + if (settings.speak) { + say(to_translate, this.prompter.profile.speak_model); + } this.bot.chat(message); } } diff --git a/src/agent/speak.js b/src/agent/speak.js index 3d366d3..fcfa001 100644 --- a/src/agent/speak.js +++ b/src/agent/speak.js @@ -1,12 +1,11 @@ import { exec, spawn } from 'child_process'; -import settings from '../../settings.js'; import { sendAudioRequest } from '../models/pollinations.js'; let speakingQueue = []; let isSpeaking = false; -export function say(text) { - speakingQueue.push(text); +export function say(text, speak_model) { + speakingQueue.push([text, speak_model]); if (!isSpeaking) processQueue(); } @@ -16,11 +15,11 @@ async function processQueue() { return; } isSpeaking = true; - const txt = speakingQueue.shift(); + const [txt, speak_model] = speakingQueue.shift(); const isWin = process.platform === 'win32'; const isMac = process.platform === 'darwin'; - const model = settings.speak_model || 'pollinations/openai-audio/echo'; + const model = speak_model || 'pollinations/openai-audio/echo'; if (model === 'system') { // system TTS @@ -39,11 +38,20 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"` } else { // remote audio provider - const [prov, mdl, voice] = model.split('/'); + let prov, mdl, voice, url; + if (typeof model === "string") { + [prov, mdl, voice] = model.split('/'); + url = "https://text.pollinations.ai/openai"; + } else { + prov = model.provider; + mdl = model.model; + voice = model.voice; + url = model.url || "https://text.pollinations.ai/openai"; + } if (prov !== 'pollinations') throw new Error(`Unknown provider: ${prov}`); try { - let audioData = await sendAudioRequest(txt, mdl, voice); + let audioData = await sendAudioRequest(txt, mdl, voice, url); if (!audioData) { audioData = "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU5LjI3LjEwMAAAAAAAAAAAAAAA/+NAwAAAAAAAAAAAAEluZm8AAAAPAAAAAAAAANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAExhdmM1OS4zNwAAAAAAAAAAAAAAAAAAAAAAAAAAAADQAAAeowAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="; // ^ 0 second silent audio clip diff --git a/src/models/pollinations.js b/src/models/pollinations.js index 1fb89ed..0402f6c 100644 --- a/src/models/pollinations.js +++ b/src/models/pollinations.js @@ -63,7 +63,7 @@ export class Pollinations { } } -export async function sendAudioRequest(text, model, voice) { +export async function sendAudioRequest(text, model, voice, url) { const payload = { model: model, modalities: ["text", "audio"], @@ -87,7 +87,7 @@ export async function sendAudioRequest(text, model, voice) { let audioData = null; try { - const response = await fetch("https://text.pollinations.ai/openai", { + const response = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json" From 5daee21e2aa3d9f800b5b7e3a43aed4d5ca9c44f Mon Sep 17 00:00:00 2001 From: uukelele Date: Mon, 21 Apr 2025 09:18:38 +0100 Subject: [PATCH 8/8] tiniest bug fix --- src/agent/speak.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agent/speak.js b/src/agent/speak.js index fcfa001..22156f9 100644 --- a/src/agent/speak.js +++ b/src/agent/speak.js @@ -43,7 +43,7 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"` [prov, mdl, voice] = model.split('/'); url = "https://text.pollinations.ai/openai"; } else { - prov = model.provider; + prov = model.api; mdl = model.model; voice = model.voice; url = model.url || "https://text.pollinations.ai/openai";