mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-09-02 04:13:03 +02:00
add gemini TTS
This commit is contained in:
parent
b027f1e345
commit
3557215e29
4 changed files with 85 additions and 3 deletions
|
@ -11,6 +11,7 @@
|
||||||
"google-translate-api-x": "^10.7.1",
|
"google-translate-api-x": "^10.7.1",
|
||||||
"groq-sdk": "^0.15.0",
|
"groq-sdk": "^0.15.0",
|
||||||
"install": "^0.13.0",
|
"install": "^0.13.0",
|
||||||
|
"lamejs": "^1.2.1",
|
||||||
"minecraft-data": "^3.78.0",
|
"minecraft-data": "^3.78.0",
|
||||||
"mineflayer": "^4.29.0",
|
"mineflayer": "^4.29.0",
|
||||||
"mineflayer-armor-manager": "^2.0.1",
|
"mineflayer-armor-manager": "^2.0.1",
|
||||||
|
@ -21,7 +22,6 @@
|
||||||
"node-canvas-webgl": "PrismarineJS/node-canvas-webgl",
|
"node-canvas-webgl": "PrismarineJS/node-canvas-webgl",
|
||||||
"npm": "^11.5.2",
|
"npm": "^11.5.2",
|
||||||
"openai": "^4.4.0",
|
"openai": "^4.4.0",
|
||||||
"patch-package": "^8.0.0",
|
|
||||||
"prismarine-item": "^1.15.0",
|
"prismarine-item": "^1.15.0",
|
||||||
"prismarine-viewer": "^1.32.0",
|
"prismarine-viewer": "^1.32.0",
|
||||||
"replicate": "^0.29.4",
|
"replicate": "^0.29.4",
|
||||||
|
@ -40,6 +40,7 @@
|
||||||
"@eslint/js": "^9.13.0",
|
"@eslint/js": "^9.13.0",
|
||||||
"eslint": "^9.13.0",
|
"eslint": "^9.13.0",
|
||||||
"eslint-plugin-no-floating-promise": "^2.0.0",
|
"eslint-plugin-no-floating-promise": "^2.0.0",
|
||||||
"globals": "^15.11.0"
|
"globals": "^15.11.0",
|
||||||
|
"patch-package": "^8.0.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
21
patches/lamejs+1.2.1.patch
Normal file
21
patches/lamejs+1.2.1.patch
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
diff --git a/node_modules/lamejs/lame.all.js b/node_modules/lamejs/lame.all.js
|
||||||
|
index bfd3637..b905508 100644
|
||||||
|
--- a/node_modules/lamejs/lame.all.js
|
||||||
|
+++ b/node_modules/lamejs/lame.all.js
|
||||||
|
@@ -1,4 +1,3 @@
|
||||||
|
-function lamejs() {
|
||||||
|
function new_byte(count) {
|
||||||
|
return new Int8Array(count);
|
||||||
|
}
|
||||||
|
@@ -15511,8 +15510,9 @@ WavHeader.readHeader = function (dataView) {
|
||||||
|
|
||||||
|
L3Side.SFBMAX = (Encoder.SBMAX_s * 3);
|
||||||
|
//testFullLength();
|
||||||
|
+export var lamejs = {}
|
||||||
|
lamejs.Mp3Encoder = Mp3Encoder;
|
||||||
|
lamejs.WavHeader = WavHeader;
|
||||||
|
-}
|
||||||
|
+
|
||||||
|
//fs=require('fs');
|
||||||
|
-lamejs();
|
||||||
|
+//lamejs();
|
|
@ -1,6 +1,7 @@
|
||||||
import { exec, spawn } from 'child_process';
|
import { exec, spawn } from 'child_process';
|
||||||
import { TTSConfig as pollinationsTTSConfig } from '../models/pollinations.js';
|
import { TTSConfig as pollinationsTTSConfig } from '../models/pollinations.js';
|
||||||
import { TTSConfig as gptTTSConfig } from '../models/gpt.js';
|
import { TTSConfig as gptTTSConfig } from '../models/gpt.js';
|
||||||
|
import { TTSConfig as geminiTTSConfig } from '../models/gemini.js';
|
||||||
|
|
||||||
let speakingQueue = [];
|
let speakingQueue = [];
|
||||||
let isSpeaking = false;
|
let isSpeaking = false;
|
||||||
|
@ -44,6 +45,8 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"`
|
||||||
return pollinationsTTSConfig.baseUrl;
|
return pollinationsTTSConfig.baseUrl;
|
||||||
} else if (prov === 'openai') {
|
} else if (prov === 'openai') {
|
||||||
return gptTTSConfig.baseUrl;
|
return gptTTSConfig.baseUrl;
|
||||||
|
} else if (prov === 'google') {
|
||||||
|
return geminiTTSConfig.baseUrl;
|
||||||
} else {
|
} else {
|
||||||
// fallback
|
// fallback
|
||||||
return 'https://api.openai.com/v1'
|
return 'https://api.openai.com/v1'
|
||||||
|
@ -68,6 +71,8 @@ $s.Speak('${txt.replace(/'/g,"''")}'); $s.Dispose()"`
|
||||||
audioData = await pollinationsTTSConfig.sendAudioRequest(txt, mdl, voice, url);
|
audioData = await pollinationsTTSConfig.sendAudioRequest(txt, mdl, voice, url);
|
||||||
} else if (prov === "openai") {
|
} else if (prov === "openai") {
|
||||||
audioData = await gptTTSConfig.sendAudioRequest(txt, mdl, voice, url);
|
audioData = await gptTTSConfig.sendAudioRequest(txt, mdl, voice, url);
|
||||||
|
} else if (prov === "google") {
|
||||||
|
audioData = await geminiTTSConfig.sendAudioRequest(txt, mdl, voice, url);
|
||||||
} else {
|
} else {
|
||||||
throw new Error(`TTS Provider ${prov} is not supported.`);
|
throw new Error(`TTS Provider ${prov} is not supported.`);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
import { GoogleGenAI } from '@google/genai';
|
import { GoogleGenAI, VideoCompressionQuality } from '@google/genai';
|
||||||
import { toSinglePrompt, strictFormat } from '../utils/text.js';
|
import { toSinglePrompt, strictFormat } from '../utils/text.js';
|
||||||
import { getKey } from '../utils/keys.js';
|
import { getKey } from '../utils/keys.js';
|
||||||
|
|
||||||
|
import { lamejs } from 'lamejs/lame.all.js';
|
||||||
|
|
||||||
|
|
||||||
export class Gemini {
|
export class Gemini {
|
||||||
constructor(model, url, params) {
|
constructor(model, url, params) {
|
||||||
this.model = model || "gemini-2.5-flash";
|
this.model = model || "gemini-2.5-flash";
|
||||||
|
@ -116,3 +119,55 @@ export class Gemini {
|
||||||
return result.embeddings;
|
return result.embeddings;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const sendAudioRequest = async (text, model, voice, url) => {
|
||||||
|
const ai = new GoogleGenAI({apiKey: getKey('GEMINI_API_KEY')});
|
||||||
|
|
||||||
|
const response = await ai.models.generateContent({
|
||||||
|
model: model,
|
||||||
|
contents: [{ parts: [{text: text}] }],
|
||||||
|
config: {
|
||||||
|
responseModalities: ['AUDIO'],
|
||||||
|
speechConfig: {
|
||||||
|
voiceConfig: {
|
||||||
|
prebuiltVoiceConfig: { voiceName: voice },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const data = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
||||||
|
// data is base64-encoded pcm
|
||||||
|
|
||||||
|
// convert pcm to mp3
|
||||||
|
const SAMPLE_RATE = 24000;
|
||||||
|
const CHANNELS = 1;
|
||||||
|
const pcmBuffer = Buffer.from(data, 'base64');
|
||||||
|
const pcmInt16Array = new Int16Array(
|
||||||
|
pcmBuffer.buffer,
|
||||||
|
pcmBuffer.byteOffset,
|
||||||
|
pcmBuffer.length / 2
|
||||||
|
);
|
||||||
|
const mp3encoder = new lamejs.Mp3Encoder(CHANNELS, SAMPLE_RATE, 128);
|
||||||
|
const sampleBlockSize = 1152; // Standard for MPEG audio
|
||||||
|
const mp3Data = [];
|
||||||
|
for (let i = 0; i < pcmInt16Array.length; i += sampleBlockSize) {
|
||||||
|
const sampleChunk = pcmInt16Array.subarray(i, i + sampleBlockSize);
|
||||||
|
const mp3buf = mp3encoder.encodeBuffer(sampleChunk);
|
||||||
|
if (mp3buf.length > 0) {
|
||||||
|
mp3Data.push(Buffer.from(mp3buf));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const mp3buf = mp3encoder.flush();
|
||||||
|
if (mp3buf.length > 0) {
|
||||||
|
mp3Data.push(Buffer.from(mp3buf));
|
||||||
|
}
|
||||||
|
const finalBuffer = Buffer.concat(mp3Data);
|
||||||
|
// finished converting
|
||||||
|
|
||||||
|
return finalBuffer.toString('base64');
|
||||||
|
}
|
||||||
|
|
||||||
|
export const TTSConfig = {
|
||||||
|
sendAudioRequest: sendAudioRequest,
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue