add semantic search

This commit is contained in:
Silas 2024-05-31 01:31:37 -04:00
parent 53635f0d59
commit de9cccabda
Failed to generate hash of commit
19 changed files with 1398 additions and 105 deletions

1104
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -11,13 +11,16 @@
"test": "vitest",
"lint": "prettier --check . && eslint .",
"format": "prettier --write .",
"model-pipeline:run": "node scripts/model-pipeline.js"
"model-pipeline:run": "node scripts/model-pipeline.js",
"generate-embeddings": "node scripts/generate-embeddings.js"
},
"devDependencies": {
"@sveltejs/adapter-auto": "^3.0.0",
"@sveltejs/kit": "^2.0.0",
"@sveltejs/vite-plugin-svelte": "^3.0.0",
"@tailwindcss/typography": "^0.5.13",
"@tensorflow-models/universal-sentence-encoder": "^1.3.3",
"@tensorflow/tfjs-node": "^4.19.0",
"@theatre/core": "^0.7.1",
"@theatre/studio": "^0.7.1",
"@threlte/theatre": "^2.1.7",
@ -30,6 +33,8 @@
"eslint": "^8.56.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-svelte": "^2.35.1",
"fs-extra": "^11.2.0",
"marked": "^12.0.2",
"postcss": "^8.4.38",
"prettier": "^3.1.1",
"prettier-plugin-svelte": "^3.1.2",
@ -44,6 +49,7 @@
"type": "module",
"dependencies": {
"@dimforge/rapier3d-compat": "^0.11.2",
"@tensorflow/tfjs": "^4.19.0",
"@threlte/core": "^7.3.0",
"@threlte/extras": "^8.11.2",
"@threlte/rapier": "^2.0.0",

View File

@ -0,0 +1,39 @@
// eslint-disable-next-line @typescript-eslint/no-unused-vars
import * as tf from '@tensorflow/tfjs-node';
import use from '@tensorflow-models/universal-sentence-encoder';
import fs from 'fs-extra';
import glob from 'glob';
import path from 'path';
import { marked } from 'marked';
async function extractTextFromMarkdown(filePath) {
const markdown = await fs.readFile(filePath, 'utf8');
return marked(markdown).replace(/<[^>]*>/g, ''); // Strip HTML tags generated by marked
}
async function generateEmbeddingsForDirectory(directoryPath) {
// Get all markdown files in directory
const files = glob.sync(`${directoryPath}/*.md`);
// Extract texts from markdown files
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const poems = await Promise.all(files.map(async (file, _index) => ({
id: path.basename(file, '.md'), // Use filename as ID
text: await extractTextFromMarkdown(file)
})));
// Load the Universal Sentence Encoder model
const model = await use.load();
const embeddings = await Promise.all(poems.map(poem => model.embed([poem.text])));
// Map embeddings back to poem objects
const poemEmbeddings = poems.map((poem, index) => ({
id: poem.id,
vector: embeddings[index].arraySync()[0] // Extract the vector
}));
// Save embeddings to JSON file
fs.writeJson('embeddings.json', poemEmbeddings);
}
generateEmbeddingsForDirectory('src/posts/poetry'); // Update path accordingly

10
src/hooks.server.ts Normal file
View File

@ -0,0 +1,10 @@
import { getModel } from '$lib/utils/search';
import { building } from '$app/environment';
if (!building) {
getModel().catch((error) => {
console.error('Failed to load the TensorFlow model at startup:', error);
});
console.log('Model loaded successfully!');
}

View File

@ -0,0 +1,36 @@
<script lang="ts">
import { searchResults } from '$lib/store';
import type { SearchResult } from '$lib/utils/search';
let results: SearchResult[] = [];
searchResults.subscribe((value: SearchResult[]) => {
results = value ? value : [];
});
function slugToTitle(slug: string) {
return slug
.replace(/-/g, ' ')
.split(' ')
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
.join(' ');
}
</script>
{#if results.length > 0}
<div class="container mx-auto flex flex-col items-center">
<div class="prose">
<h1 class="py-6">Search results:</h1>
</div>
<ul>
{#each results as result}
<li class="py-4">
<h3 class="pb-1">
<a class="link" href="/poetry/{result.poem.id}">{slugToTitle(result.poem.id)}</a>
<p class="text-sm">(Relevance: {(result.similarity * 100).toFixed(3)})%</p>
</h3>
</li>
{/each}
</ul>
</div>
{/if}

5
src/lib/store.ts Normal file
View File

@ -0,0 +1,5 @@
import { writable } from 'svelte/store';
import type { SearchResult } from './utils/search';
const initArray: SearchResult[] = [];
export const searchResults = writable(initArray);

File diff suppressed because one or more lines are too long

24
src/lib/utils/search.ts Normal file
View File

@ -0,0 +1,24 @@
// src/lib/initModel.ts
import use, { UniversalSentenceEncoder } from '@tensorflow-models/universal-sentence-encoder';
// eslint-disable-next-line @typescript-eslint/no-unused-vars
import * as tf from '@tensorflow/tfjs-node';
export type Embedding = {
id: string;
vector: number[];
};
export type SearchResult = {
poem: Embedding;
similarity: number;
};
let model: UniversalSentenceEncoder | null = null;
export async function getModel(): Promise<UniversalSentenceEncoder> {
if (!model) {
model = await use.load();
console.log('Model loaded successfully!');
}
return model;
}

View File

@ -5,8 +5,9 @@ date: 2014-11-10 00:00:00 +0000
tags:
- Fall
title: Autumn Synesthesia
layout: poetry
---
The trees branch out their reds and yellows.
Their last battle cry before the frost.
The further north, the more pronounced

View File

@ -8,8 +8,9 @@ tags:
- Love
- Relationships
title: Breakfast Blue Pt. 2
layout: poetry
---
Oh, my good friend, I didn't mean to leave.
It wasn't your fault, so no longer grieve!
If I could, I would run back to your side.

View File

@ -6,6 +6,7 @@ tags:
- Humor
- Muffins
title: Breakfast Blues
layout: poetry
---
Lost my reason to wake in the morning

View File

@ -5,7 +5,6 @@ date: 2019-03-06 12:00:00 +0000
tags:
- Christian
title: Djinn
year: 2019
layout: poetry
---
@ -18,4 +17,3 @@ I let out a chuckle
And a sigh of relief.
"He didn't holy ghost us
Despite such unbelief."

View File

@ -6,7 +6,7 @@ tags:
- Wisdom
- Lessons
title: I've Seen
year: 2018
layout: poetry
---
I've seen tears fall

View File

@ -8,6 +8,7 @@ tags:
- Relationships
- Introspective
title: Infrared Homing
layout: poetry
---
In a moment

View File

@ -6,7 +6,6 @@ tags:
- Chess
- Lessons
title: King's Pin
year: 2018
---
You placed my king

View File

@ -1,5 +1,21 @@
<script>
<script lang="ts">
import '../../app.css';
import { searchResults } from '$lib/store';
import type { SearchResult } from '$lib/utils/search';
import SearchResults from '$lib/components/SearchResults.svelte';
let searchQuery = '';
async function handleSearch() {
const response = await fetch(`/api/poetry/search?q=${encodeURIComponent(searchQuery)}`);
if (response.ok) {
const data: SearchResult[] = await response.json();
searchResults.set(data);
} else {
console.error('Failed to fetch search results');
searchResults.set([]);
}
}
</script>
<div class="flex flex-col h-screen">
@ -35,7 +51,27 @@
</div>
<a class="link-primary text-xl" href="/">silentsilas</a>
</div>
<div class="navbar-end lg:hidden">
<div class="form-control">
<input
type="text"
placeholder="Search"
class="input input-bordered md:w-auto"
bind:value={searchQuery}
on:input={handleSearch}
/>
</div>
</div>
<div class="navbar-end hidden lg:flex">
<div class="form-control">
<input
type="text"
placeholder="Search"
class="input input-bordered md:w-auto"
bind:value={searchQuery}
on:input={handleSearch}
/>
</div>
<ul class="menu menu-horizontal px-1">
<li><a href="/thoughts">Thoughts</a></li>
<li><a href="/poetry">Poetry</a></li>
@ -48,5 +84,6 @@
<div class="flex flex-col items-center flex-1 overflow-auto">
<slot />
<SearchResults />
</div>
</div>

View File

@ -1,6 +1,14 @@
<script lang="ts">
import { onMount } from 'svelte';
import { fade } from 'svelte/transition';
import { searchResults } from '$lib/store';
import type { SearchResult } from '$lib/utils/search';
let results: SearchResult[] = [];
searchResults.subscribe((value: SearchResult[]) => {
results = value ? value : [];
});
type Greeting = {
greeting: string;
@ -59,12 +67,11 @@
}
</script>
{#if results.length <= 0}
<div class="container mx-auto flex flex-col justify-center items-center flex-1">
<div class="justify-center items-center text-center m-10">
{#if visible && currentGreeting}
<div
transition:fade={{ duration: 1200 }}
>
<div transition:fade={{ duration: 1200 }}>
<span class="font-bold">{currentGreeting.greeting}</span>
{#if currentGreeting.romanisation}
<span class="text-gray-500">( {currentGreeting.romanisation} )</span>
@ -88,8 +95,8 @@
</p>
<p>
Here you can browse my shower <a href="/thoughts" class="link">thoughts</a> and bad
<a href="/poetry" class="link">poetry</a>. Opinions are personally mine and not endorsed by my
employer.
<a href="/poetry" class="link">poetry</a>. Opinions are personally mine and not endorsed by
my employer.
</p>
<p>
I tend to start a lot of <a href="/projects" class="link">projects</a>, but I'm trying to
@ -104,3 +111,4 @@
<p>Shalom.</p>
</div>
</div>
{/if}

View File

@ -1,9 +1,17 @@
<script lang="ts">
import { goto } from '$app/navigation';
import { page } from '$app/stores';
import { searchResults } from '$lib/store';
import type { SearchResult } from '$lib/utils/search';
import type { PageData } from '../poetry/$types';
export let data: PageData;
let results: SearchResult[] = [];
searchResults.subscribe((value: SearchResult[]) => {
results = value ? value : [];
});
const formatDate = (date: string) => {
return new Date(date).toLocaleDateString(undefined, {
year: 'numeric',
@ -35,6 +43,7 @@
}
</script>
{#if results.length <= 0}
<div class="container mx-auto flex flex-col items-center">
<div class="prose">
<h1 class="py-6">Poetry</h1>
@ -68,3 +77,4 @@
>
</nav>
{/if}
{/if}

View File

@ -0,0 +1,40 @@
// eslint-disable-next-line
import * as tf from '@tensorflow/tfjs-node';
import poemEmbeddings from '$lib/utils/poetry/embeddings.json';
import { json } from '@sveltejs/kit';
import { getModel, type Embedding, type SearchResult } from '$lib/utils/search';
// Search handler
export const GET = async ({ url }: { url: URL }) => {
const model = await getModel();
const searchQuery = url.searchParams.get('q');
if (!searchQuery) {
return { status: 400, body: { error: 'Query parameter "q" is required' } };
}
try {
// Generate embedding for the query
const queryEmbedding = await model.embed([searchQuery]);
const queryVec = queryEmbedding.arraySync()[0];
// Calculate similarities
const results = poemEmbeddings
.map((poem: Embedding) => ({
poem,
similarity: cosineSimilarity(queryVec, poem.vector)
}))
.sort((a: SearchResult, b: SearchResult) => b.similarity - a.similarity)
.slice(0, 10); // Top 10 results
return json(results);
} catch (error) {
return { status: 500, body: { error: (error as Error).message } };
}
};
function cosineSimilarity(vecA: number[], vecB: number[]) {
const dotProduct = vecA.reduce((acc, val, i) => acc + val * vecB[i], 0);
const magnitudeA = Math.sqrt(vecA.reduce((acc, val) => acc + val * val, 0));
const magnitudeB = Math.sqrt(vecB.reduce((acc, val) => acc + val * val, 0));
return dotProduct / (magnitudeA * magnitudeB);
}