add semantic search

This commit is contained in:
Silas 2024-05-31 01:31:37 -04:00
parent 53635f0d59
commit de9cccabda
Failed to generate hash of commit
19 changed files with 1398 additions and 105 deletions

1104
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -11,13 +11,16 @@
"test": "vitest", "test": "vitest",
"lint": "prettier --check . && eslint .", "lint": "prettier --check . && eslint .",
"format": "prettier --write .", "format": "prettier --write .",
"model-pipeline:run": "node scripts/model-pipeline.js" "model-pipeline:run": "node scripts/model-pipeline.js",
"generate-embeddings": "node scripts/generate-embeddings.js"
}, },
"devDependencies": { "devDependencies": {
"@sveltejs/adapter-auto": "^3.0.0", "@sveltejs/adapter-auto": "^3.0.0",
"@sveltejs/kit": "^2.0.0", "@sveltejs/kit": "^2.0.0",
"@sveltejs/vite-plugin-svelte": "^3.0.0", "@sveltejs/vite-plugin-svelte": "^3.0.0",
"@tailwindcss/typography": "^0.5.13", "@tailwindcss/typography": "^0.5.13",
"@tensorflow-models/universal-sentence-encoder": "^1.3.3",
"@tensorflow/tfjs-node": "^4.19.0",
"@theatre/core": "^0.7.1", "@theatre/core": "^0.7.1",
"@theatre/studio": "^0.7.1", "@theatre/studio": "^0.7.1",
"@threlte/theatre": "^2.1.7", "@threlte/theatre": "^2.1.7",
@ -30,6 +33,8 @@
"eslint": "^8.56.0", "eslint": "^8.56.0",
"eslint-config-prettier": "^9.1.0", "eslint-config-prettier": "^9.1.0",
"eslint-plugin-svelte": "^2.35.1", "eslint-plugin-svelte": "^2.35.1",
"fs-extra": "^11.2.0",
"marked": "^12.0.2",
"postcss": "^8.4.38", "postcss": "^8.4.38",
"prettier": "^3.1.1", "prettier": "^3.1.1",
"prettier-plugin-svelte": "^3.1.2", "prettier-plugin-svelte": "^3.1.2",
@ -44,6 +49,7 @@
"type": "module", "type": "module",
"dependencies": { "dependencies": {
"@dimforge/rapier3d-compat": "^0.11.2", "@dimforge/rapier3d-compat": "^0.11.2",
"@tensorflow/tfjs": "^4.19.0",
"@threlte/core": "^7.3.0", "@threlte/core": "^7.3.0",
"@threlte/extras": "^8.11.2", "@threlte/extras": "^8.11.2",
"@threlte/rapier": "^2.0.0", "@threlte/rapier": "^2.0.0",

View File

@ -0,0 +1,39 @@
// eslint-disable-next-line @typescript-eslint/no-unused-vars
import * as tf from '@tensorflow/tfjs-node';
import use from '@tensorflow-models/universal-sentence-encoder';
import fs from 'fs-extra';
import glob from 'glob';
import path from 'path';
import { marked } from 'marked';
async function extractTextFromMarkdown(filePath) {
const markdown = await fs.readFile(filePath, 'utf8');
return marked(markdown).replace(/<[^>]*>/g, ''); // Strip HTML tags generated by marked
}
async function generateEmbeddingsForDirectory(directoryPath) {
// Get all markdown files in directory
const files = glob.sync(`${directoryPath}/*.md`);
// Extract texts from markdown files
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const poems = await Promise.all(files.map(async (file, _index) => ({
id: path.basename(file, '.md'), // Use filename as ID
text: await extractTextFromMarkdown(file)
})));
// Load the Universal Sentence Encoder model
const model = await use.load();
const embeddings = await Promise.all(poems.map(poem => model.embed([poem.text])));
// Map embeddings back to poem objects
const poemEmbeddings = poems.map((poem, index) => ({
id: poem.id,
vector: embeddings[index].arraySync()[0] // Extract the vector
}));
// Save embeddings to JSON file
fs.writeJson('embeddings.json', poemEmbeddings);
}
generateEmbeddingsForDirectory('src/posts/poetry'); // Update path accordingly

10
src/hooks.server.ts Normal file
View File

@ -0,0 +1,10 @@
import { getModel } from '$lib/utils/search';
import { building } from '$app/environment';
if (!building) {
getModel().catch((error) => {
console.error('Failed to load the TensorFlow model at startup:', error);
});
console.log('Model loaded successfully!');
}

View File

@ -0,0 +1,36 @@
<script lang="ts">
import { searchResults } from '$lib/store';
import type { SearchResult } from '$lib/utils/search';
let results: SearchResult[] = [];
searchResults.subscribe((value: SearchResult[]) => {
results = value ? value : [];
});
function slugToTitle(slug: string) {
return slug
.replace(/-/g, ' ')
.split(' ')
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
.join(' ');
}
</script>
{#if results.length > 0}
<div class="container mx-auto flex flex-col items-center">
<div class="prose">
<h1 class="py-6">Search results:</h1>
</div>
<ul>
{#each results as result}
<li class="py-4">
<h3 class="pb-1">
<a class="link" href="/poetry/{result.poem.id}">{slugToTitle(result.poem.id)}</a>
<p class="text-sm">(Relevance: {(result.similarity * 100).toFixed(3)})%</p>
</h3>
</li>
{/each}
</ul>
</div>
{/if}

5
src/lib/store.ts Normal file
View File

@ -0,0 +1,5 @@
import { writable } from 'svelte/store';
import type { SearchResult } from './utils/search';
const initArray: SearchResult[] = [];
export const searchResults = writable(initArray);

File diff suppressed because one or more lines are too long

24
src/lib/utils/search.ts Normal file
View File

@ -0,0 +1,24 @@
// src/lib/initModel.ts
import use, { UniversalSentenceEncoder } from '@tensorflow-models/universal-sentence-encoder';
// eslint-disable-next-line @typescript-eslint/no-unused-vars
import * as tf from '@tensorflow/tfjs-node';
export type Embedding = {
id: string;
vector: number[];
};
export type SearchResult = {
poem: Embedding;
similarity: number;
};
let model: UniversalSentenceEncoder | null = null;
export async function getModel(): Promise<UniversalSentenceEncoder> {
if (!model) {
model = await use.load();
console.log('Model loaded successfully!');
}
return model;
}

View File

@ -5,8 +5,9 @@ date: 2014-11-10 00:00:00 +0000
tags: tags:
- Fall - Fall
title: Autumn Synesthesia title: Autumn Synesthesia
layout: poetry
--- ---
The trees branch out their reds and yellows. The trees branch out their reds and yellows.
Their last battle cry before the frost. Their last battle cry before the frost.
The further north, the more pronounced The further north, the more pronounced

View File

@ -8,8 +8,9 @@ tags:
- Love - Love
- Relationships - Relationships
title: Breakfast Blue Pt. 2 title: Breakfast Blue Pt. 2
layout: poetry
--- ---
Oh, my good friend, I didn't mean to leave. Oh, my good friend, I didn't mean to leave.
It wasn't your fault, so no longer grieve! It wasn't your fault, so no longer grieve!
If I could, I would run back to your side. If I could, I would run back to your side.

View File

@ -6,6 +6,7 @@ tags:
- Humor - Humor
- Muffins - Muffins
title: Breakfast Blues title: Breakfast Blues
layout: poetry
--- ---
Lost my reason to wake in the morning Lost my reason to wake in the morning

View File

@ -5,7 +5,6 @@ date: 2019-03-06 12:00:00 +0000
tags: tags:
- Christian - Christian
title: Djinn title: Djinn
year: 2019
layout: poetry layout: poetry
--- ---
@ -18,4 +17,3 @@ I let out a chuckle
And a sigh of relief. And a sigh of relief.
"He didn't holy ghost us "He didn't holy ghost us
Despite such unbelief." Despite such unbelief."

View File

@ -6,7 +6,7 @@ tags:
- Wisdom - Wisdom
- Lessons - Lessons
title: I've Seen title: I've Seen
year: 2018 layout: poetry
--- ---
I've seen tears fall I've seen tears fall

View File

@ -8,6 +8,7 @@ tags:
- Relationships - Relationships
- Introspective - Introspective
title: Infrared Homing title: Infrared Homing
layout: poetry
--- ---
In a moment In a moment

View File

@ -6,7 +6,6 @@ tags:
- Chess - Chess
- Lessons - Lessons
title: King's Pin title: King's Pin
year: 2018
--- ---
You placed my king You placed my king

View File

@ -1,5 +1,21 @@
<script> <script lang="ts">
import '../../app.css'; import '../../app.css';
import { searchResults } from '$lib/store';
import type { SearchResult } from '$lib/utils/search';
import SearchResults from '$lib/components/SearchResults.svelte';
let searchQuery = '';
async function handleSearch() {
const response = await fetch(`/api/poetry/search?q=${encodeURIComponent(searchQuery)}`);
if (response.ok) {
const data: SearchResult[] = await response.json();
searchResults.set(data);
} else {
console.error('Failed to fetch search results');
searchResults.set([]);
}
}
</script> </script>
<div class="flex flex-col h-screen"> <div class="flex flex-col h-screen">
@ -35,7 +51,27 @@
</div> </div>
<a class="link-primary text-xl" href="/">silentsilas</a> <a class="link-primary text-xl" href="/">silentsilas</a>
</div> </div>
<div class="navbar-end lg:hidden">
<div class="form-control">
<input
type="text"
placeholder="Search"
class="input input-bordered md:w-auto"
bind:value={searchQuery}
on:input={handleSearch}
/>
</div>
</div>
<div class="navbar-end hidden lg:flex"> <div class="navbar-end hidden lg:flex">
<div class="form-control">
<input
type="text"
placeholder="Search"
class="input input-bordered md:w-auto"
bind:value={searchQuery}
on:input={handleSearch}
/>
</div>
<ul class="menu menu-horizontal px-1"> <ul class="menu menu-horizontal px-1">
<li><a href="/thoughts">Thoughts</a></li> <li><a href="/thoughts">Thoughts</a></li>
<li><a href="/poetry">Poetry</a></li> <li><a href="/poetry">Poetry</a></li>
@ -48,5 +84,6 @@
<div class="flex flex-col items-center flex-1 overflow-auto"> <div class="flex flex-col items-center flex-1 overflow-auto">
<slot /> <slot />
<SearchResults />
</div> </div>
</div> </div>

View File

@ -1,6 +1,14 @@
<script lang="ts"> <script lang="ts">
import { onMount } from 'svelte'; import { onMount } from 'svelte';
import { fade } from 'svelte/transition'; import { fade } from 'svelte/transition';
import { searchResults } from '$lib/store';
import type { SearchResult } from '$lib/utils/search';
let results: SearchResult[] = [];
searchResults.subscribe((value: SearchResult[]) => {
results = value ? value : [];
});
type Greeting = { type Greeting = {
greeting: string; greeting: string;
@ -59,12 +67,11 @@
} }
</script> </script>
{#if results.length <= 0}
<div class="container mx-auto flex flex-col justify-center items-center flex-1"> <div class="container mx-auto flex flex-col justify-center items-center flex-1">
<div class="justify-center items-center text-center m-10"> <div class="justify-center items-center text-center m-10">
{#if visible && currentGreeting} {#if visible && currentGreeting}
<div <div transition:fade={{ duration: 1200 }}>
transition:fade={{ duration: 1200 }}
>
<span class="font-bold">{currentGreeting.greeting}</span> <span class="font-bold">{currentGreeting.greeting}</span>
{#if currentGreeting.romanisation} {#if currentGreeting.romanisation}
<span class="text-gray-500">( {currentGreeting.romanisation} )</span> <span class="text-gray-500">( {currentGreeting.romanisation} )</span>
@ -88,8 +95,8 @@
</p> </p>
<p> <p>
Here you can browse my shower <a href="/thoughts" class="link">thoughts</a> and bad Here you can browse my shower <a href="/thoughts" class="link">thoughts</a> and bad
<a href="/poetry" class="link">poetry</a>. Opinions are personally mine and not endorsed by my <a href="/poetry" class="link">poetry</a>. Opinions are personally mine and not endorsed by
employer. my employer.
</p> </p>
<p> <p>
I tend to start a lot of <a href="/projects" class="link">projects</a>, but I'm trying to I tend to start a lot of <a href="/projects" class="link">projects</a>, but I'm trying to
@ -104,3 +111,4 @@
<p>Shalom.</p> <p>Shalom.</p>
</div> </div>
</div> </div>
{/if}

View File

@ -1,9 +1,17 @@
<script lang="ts"> <script lang="ts">
import { goto } from '$app/navigation'; import { goto } from '$app/navigation';
import { page } from '$app/stores'; import { page } from '$app/stores';
import { searchResults } from '$lib/store';
import type { SearchResult } from '$lib/utils/search';
import type { PageData } from '../poetry/$types'; import type { PageData } from '../poetry/$types';
export let data: PageData; export let data: PageData;
let results: SearchResult[] = [];
searchResults.subscribe((value: SearchResult[]) => {
results = value ? value : [];
});
const formatDate = (date: string) => { const formatDate = (date: string) => {
return new Date(date).toLocaleDateString(undefined, { return new Date(date).toLocaleDateString(undefined, {
year: 'numeric', year: 'numeric',
@ -35,6 +43,7 @@
} }
</script> </script>
{#if results.length <= 0}
<div class="container mx-auto flex flex-col items-center"> <div class="container mx-auto flex flex-col items-center">
<div class="prose"> <div class="prose">
<h1 class="py-6">Poetry</h1> <h1 class="py-6">Poetry</h1>
@ -68,3 +77,4 @@
> >
</nav> </nav>
{/if} {/if}
{/if}

View File

@ -0,0 +1,40 @@
// eslint-disable-next-line
import * as tf from '@tensorflow/tfjs-node';
import poemEmbeddings from '$lib/utils/poetry/embeddings.json';
import { json } from '@sveltejs/kit';
import { getModel, type Embedding, type SearchResult } from '$lib/utils/search';
// Search handler
export const GET = async ({ url }: { url: URL }) => {
const model = await getModel();
const searchQuery = url.searchParams.get('q');
if (!searchQuery) {
return { status: 400, body: { error: 'Query parameter "q" is required' } };
}
try {
// Generate embedding for the query
const queryEmbedding = await model.embed([searchQuery]);
const queryVec = queryEmbedding.arraySync()[0];
// Calculate similarities
const results = poemEmbeddings
.map((poem: Embedding) => ({
poem,
similarity: cosineSimilarity(queryVec, poem.vector)
}))
.sort((a: SearchResult, b: SearchResult) => b.similarity - a.similarity)
.slice(0, 10); // Top 10 results
return json(results);
} catch (error) {
return { status: 500, body: { error: (error as Error).message } };
}
};
function cosineSimilarity(vecA: number[], vecB: number[]) {
const dotProduct = vecA.reduce((acc, val, i) => acc + val * vecB[i], 0);
const magnitudeA = Math.sqrt(vecA.reduce((acc, val) => acc + val * val, 0));
const magnitudeB = Math.sqrt(vecB.reduce((acc, val) => acc + val * val, 0));
return dotProduct / (magnitudeA * magnitudeB);
}