add fuzzy search and combine it with semantic search results, update readme, change license file extension

This commit is contained in:
2024-06-10 20:51:35 -04:00
parent 3242c3bc77
commit 3320683d10
13 changed files with 176 additions and 88 deletions

View File

@@ -1,40 +0,0 @@
// eslint-disable-next-line
import * as tf from '@tensorflow/tfjs-node';
import poemEmbeddings from '$lib/utils/poetry/embeddings.json';
import { json } from '@sveltejs/kit';
import { getModel, type Embedding, type SearchResult } from '$lib/utils/search';
// Search handler
export const GET = async ({ url }: { url: URL }) => {
const model = await getModel();
const searchQuery = url.searchParams.get('q');
if (!searchQuery) {
return { status: 400, body: { error: 'Query parameter "q" is required' } };
}
try {
// Generate embedding for the query
const queryEmbedding = await model.embed([searchQuery]);
const queryVec = queryEmbedding.arraySync()[0];
// Calculate similarities
const results = poemEmbeddings
.map((poem: Embedding) => ({
poem,
similarity: cosineSimilarity(queryVec, poem.vector)
}))
.sort((a: SearchResult, b: SearchResult) => b.similarity - a.similarity)
.slice(0, 10); // Top 10 results
return json(results);
} catch (error) {
return { status: 500, body: { error: (error as Error).message } };
}
};
function cosineSimilarity(vecA: number[], vecB: number[]) {
const dotProduct = vecA.reduce((acc, val, i) => acc + val * vecB[i], 0);
const magnitudeA = Math.sqrt(vecA.reduce((acc, val) => acc + val * val, 0));
const magnitudeB = Math.sqrt(vecB.reduce((acc, val) => acc + val * val, 0));
return dotProduct / (magnitudeA * magnitudeB);
}

View File

@@ -0,0 +1,88 @@
// eslint-disable-next-line
import * as tf from '@tensorflow/tfjs-node';
import postEmbeddings from '$lib/utils/poetry/embeddings.json';
import { json } from '@sveltejs/kit';
import { getModel, type Embedding, type SearchResult } from '$lib/utils/search';
import { fetchMarkdownPosts } from '$lib/utils';
import Fuse from 'fuse.js';
// Search handler
export const GET = async ({ url }: { url: URL }) => {
const searchQuery = url.searchParams.get('q');
if (!searchQuery) {
return { status: 400, body: { error: 'Query parameter "q" is required' } };
}
try {
const model = await getModel();
const { posts } = await fetchMarkdownPosts('all');
const fuse = new Fuse(posts, {
keys: ['content', 'meta.title', 'meta.tags'],
includeScore: true
});
// Fuzzy search
const fuzzyResults = fuse.search(searchQuery, { limit: 10 });
// Generate embedding for the query
const queryEmbedding = await model.embed([searchQuery]);
const queryVec = queryEmbedding.arraySync()[0];
// Calculate similarities
let semanticResults = postEmbeddings.map((post: Embedding) => ({
post,
similarity: cosineSimilarity(queryVec, post.vector)
}));
// add fuzzy results to semantic results
semanticResults = semanticResults.map((semanticResult) => {
const fuzzyResultIndex = fuzzyResults.findIndex(
(fuzzyResult) =>
fuzzyResult.item.section === semanticResult.post.section &&
fuzzyResult.item.filename === semanticResult.post.filename
);
if (fuzzyResultIndex > -1) {
const fuzzyResult = fuzzyResults.splice(fuzzyResultIndex, 1)[0];
if (fuzzyResult.score && 1 - fuzzyResult.score > 0.8) {
semanticResult.similarity = 1 - fuzzyResult.score / 2 + semanticResult.similarity;
}
}
return semanticResult;
});
// add rest of fuzzy results
semanticResults.push(
...fuzzyResults.map((fuzzyResult) => {
let similarity = 0;
if (fuzzyResult.score && 1 - fuzzyResult.score > 0.9) {
similarity = 1 - fuzzyResult.score / 2;
}
return {
post: {
id: fuzzyResult.item.id,
section: fuzzyResult.item.section,
filename: fuzzyResult.item.filename,
vector: [0, 0, 0]
},
similarity: similarity
};
})
);
semanticResults = semanticResults
.sort((a: SearchResult, b: SearchResult) => b.similarity - a.similarity)
.slice(0, 10);
return json(semanticResults);
} catch (error) {
return { status: 500, body: { error: (error as Error).message } };
}
};
function cosineSimilarity(vecA: number[], vecB: number[]) {
const dotProduct = vecA.reduce((acc, val, i) => acc + val * vecB[i], 0);
const magnitudeA = Math.sqrt(vecA.reduce((acc, val) => acc + val * val, 0));
const magnitudeB = Math.sqrt(vecB.reduce((acc, val) => acc + val * val, 0));
return dotProduct / (magnitudeA * magnitudeB);
}