use word-overlap for skill docs embed if unsupported

This commit is contained in:
MaxRobinsonTheGreat 2025-02-17 13:13:45 -06:00
parent 7970397517
commit 138a9838ae
7 changed files with 73 additions and 36 deletions

View file

@ -35,9 +35,9 @@ export class Coder {
while ((match = skillRegex.exec(code)) !== null) {
skills.push(match[1]);
}
const allDocs = await this.agent.prompter.skill_libary.getRelevantSkillDocs();
//lint if the function exists
const missingSkills = skills.filter(skill => !allDocs.includes(skill));
const allDocs = await this.agent.prompter.skill_libary.getAllSkillDocs();
// check function exists
const missingSkills = skills.filter(skill => !!allDocs[skill]);
if (missingSkills.length > 0) {
result += 'These functions do not exist. Please modify the correct function name and try again.\n';
result += '### FUNCTIONS NOT FOUND ###\n';
@ -163,7 +163,6 @@ export class Coder {
for (let i=0; i<5; i++) {
if (this.agent.bot.interrupt_code)
return interrupt_return;
console.log(messages)
let res = await this.agent.prompter.promptCoding(JSON.parse(JSON.stringify(messages)));
if (this.agent.bot.interrupt_code)
return interrupt_return;

View file

@ -33,8 +33,10 @@ export const actionsList = [
},
perform: async function (agent, prompt) {
// just ignore prompt - it is now in context in chat history
if (!settings.allow_insecure_coding)
if (!settings.allow_insecure_coding) {
agent.openChat('newAction is disabled. Enable with allow_insecure_coding=true in settings.js');
return 'newAction not allowed! Code writing is disabled in settings. Notify the user.';
}
return await agent.coder.generateCode(agent.history);
}
},

View file

@ -1,34 +1,53 @@
import { cosineSimilarity } from '../../utils/math.js';
import { getSkillDocs } from './index.js';
import { wordOverlapScore } from '../../utils/text.js';
export class SkillLibrary {
constructor(agent,embedding_model) {
this.agent = agent;
this.embedding_model = embedding_model;
this.skill_docs_embeddings = {};
this.skill_docs = null;
}
async initSkillLibrary() {
const skillDocs = getSkillDocs();
const embeddingPromises = skillDocs.map((doc) => {
return (async () => {
let func_name_desc = doc.split('\n').slice(0, 2).join('');
this.skill_docs_embeddings[doc] = await this.embedding_model.embed(func_name_desc);
})();
});
await Promise.all(embeddingPromises);
this.skill_docs = skillDocs;
if (this.embedding_model) {
const embeddingPromises = skillDocs.map((doc) => {
return (async () => {
let func_name_desc = doc.split('\n').slice(0, 2).join('');
this.skill_docs_embeddings[doc] = await this.embedding_model.embed(func_name_desc);
})();
});
await Promise.all(embeddingPromises);
}
}
async getAllSkillDocs() {
return this.skill_docs;
}
async getRelevantSkillDocs(message, select_num) {
let latest_message_embedding = '';
if(message) //message is not empty, get the relevant skill docs, else return all skill docs
latest_message_embedding = await this.embedding_model.embed(message);
let skill_doc_similarities = Object.keys(this.skill_docs_embeddings)
if(!message) // use filler message if none is provided
message = '(no message)';
let skill_doc_similarities = [];
if (!this.embedding_model) {
skill_doc_similarities = Object.keys(this.skill_docs)
.map(doc_key => ({
doc_key,
similarity_score: wordOverlapScore(message, this.skill_docs[doc_key])
}))
.sort((a, b) => b.similarity_score - a.similarity_score);
}
else {
let latest_message_embedding = '';
skill_doc_similarities = Object.keys(this.skill_docs_embeddings)
.map(doc_key => ({
doc_key,
similarity_score: cosineSimilarity(latest_message_embedding, this.skill_docs_embeddings[doc_key])
}))
.sort((a, b) => b.similarity_score - a.similarity_score);
}
let length = skill_doc_similarities.length;
if (typeof select_num !== 'number' || isNaN(select_num) || select_num < 0) {
@ -42,6 +61,4 @@ export class SkillLibrary {
relevant_skill_docs += selected_docs.map(doc => `${doc.doc_key}`).join('\n### ');
return relevant_skill_docs;
}
}

View file

@ -111,6 +111,18 @@ export async function craftRecipe(bot, itemName, num=1) {
return true;
}
export async function wait(seconds) {
/**
* Waits for the given number of seconds.
* @param {number} seconds, the number of seconds to wait.
* @returns {Promise<boolean>} true if the wait was successful, false otherwise.
* @example
* await skills.wait(10);
**/
// setTimeout is disabled to prevent unawaited code, so this is a safe alternative
await new Promise(resolve => setTimeout(resolve, seconds * 1000));
return true;
}
export async function smeltItem(bot, itemName, num=1) {
/**

View file

@ -90,14 +90,21 @@ export class Prompter {
this.embedding_model = new Qwen(embedding.model, embedding.url);
else if (embedding.api === 'mistral')
this.embedding_model = new Mistral(embedding.model, embedding.url);
else if (embedding.api === 'huggingface')
this.embedding_model = new HuggingFace(embedding.model, embedding.url);
else if (embedding.api === 'groq')
this.embedding_model = new GroqCloudAPI(embedding.model, embedding.url);
else if (embedding.api === 'novita')
this.embedding_model = new Novita(embedding.model, embedding.url);
else {
this.embedding_model = null;
console.log('Unknown embedding: ', embedding ? embedding.api : '[NOT SPECIFIED]', '. Using word overlap.');
let embedding_name = embedding ? embedding.api : '[NOT SPECIFIED]'
console.warn('Unsupported embedding: ' + embedding_name + '. Using word-overlap instead, expect reduced performance. Recommend using a supported embedding model. See Readme.');
}
}
catch (err) {
console.log('Warning: Failed to initialize embedding model:', err.message);
console.log('Continuing anyway, using word overlap instead.');
console.warn('Warning: Failed to initialize embedding model:', err.message);
console.log('Continuing anyway, using word-overlap instead.');
this.embedding_model = null;
}
this.skill_libary = new SkillLibrary(agent, this.embedding_model);

View file

@ -1,5 +1,5 @@
import { cosineSimilarity } from './math.js';
import { stringifyTurns } from './text.js';
import { stringifyTurns, wordOverlapScore } from './text.js';
export class Examples {
constructor(model, select_num=2) {
@ -18,17 +18,6 @@ export class Examples {
return messages.trim();
}
getWords(text) {
return text.replace(/[^a-zA-Z ]/g, '').toLowerCase().split(' ');
}
wordOverlapScore(text1, text2) {
const words1 = this.getWords(text1);
const words2 = this.getWords(text2);
const intersection = words1.filter(word => words2.includes(word));
return intersection.length / (words1.length + words2.length - intersection.length);
}
async load(examples) {
this.examples = examples;
if (!this.model) return; // Early return if no embedding model
@ -68,8 +57,8 @@ export class Examples {
}
else {
this.examples.sort((a, b) =>
this.wordOverlapScore(turn_text, this.turnsToText(b)) -
this.wordOverlapScore(turn_text, this.turnsToText(a))
wordOverlapScore(turn_text, this.turnsToText(b)) -
wordOverlapScore(turn_text, this.turnsToText(a))
);
}
let selected = this.examples.slice(0, this.select_num);

View file

@ -26,6 +26,17 @@ export function toSinglePrompt(turns, system=null, stop_seq='***', model_nicknam
return prompt;
}
function _getWords(text) {
return text.replace(/[^a-zA-Z ]/g, '').toLowerCase().split(' ');
}
export function wordOverlapScore(text1, text2) {
const words1 = _getWords(text1);
const words2 = _getWords(text2);
const intersection = words1.filter(word => words2.includes(word));
return intersection.length / (words1.length + words2.length - intersection.length);
}
// ensures stricter turn order and roles:
// - system messages are treated as user messages and prefixed with SYSTEM:
// - combines repeated messages from users