add semantic search
This commit is contained in:
parent
53635f0d59
commit
de9cccabda
File diff suppressed because it is too large
Load Diff
|
@ -11,13 +11,16 @@
|
||||||
"test": "vitest",
|
"test": "vitest",
|
||||||
"lint": "prettier --check . && eslint .",
|
"lint": "prettier --check . && eslint .",
|
||||||
"format": "prettier --write .",
|
"format": "prettier --write .",
|
||||||
"model-pipeline:run": "node scripts/model-pipeline.js"
|
"model-pipeline:run": "node scripts/model-pipeline.js",
|
||||||
|
"generate-embeddings": "node scripts/generate-embeddings.js"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@sveltejs/adapter-auto": "^3.0.0",
|
"@sveltejs/adapter-auto": "^3.0.0",
|
||||||
"@sveltejs/kit": "^2.0.0",
|
"@sveltejs/kit": "^2.0.0",
|
||||||
"@sveltejs/vite-plugin-svelte": "^3.0.0",
|
"@sveltejs/vite-plugin-svelte": "^3.0.0",
|
||||||
"@tailwindcss/typography": "^0.5.13",
|
"@tailwindcss/typography": "^0.5.13",
|
||||||
|
"@tensorflow-models/universal-sentence-encoder": "^1.3.3",
|
||||||
|
"@tensorflow/tfjs-node": "^4.19.0",
|
||||||
"@theatre/core": "^0.7.1",
|
"@theatre/core": "^0.7.1",
|
||||||
"@theatre/studio": "^0.7.1",
|
"@theatre/studio": "^0.7.1",
|
||||||
"@threlte/theatre": "^2.1.7",
|
"@threlte/theatre": "^2.1.7",
|
||||||
|
@ -30,6 +33,8 @@
|
||||||
"eslint": "^8.56.0",
|
"eslint": "^8.56.0",
|
||||||
"eslint-config-prettier": "^9.1.0",
|
"eslint-config-prettier": "^9.1.0",
|
||||||
"eslint-plugin-svelte": "^2.35.1",
|
"eslint-plugin-svelte": "^2.35.1",
|
||||||
|
"fs-extra": "^11.2.0",
|
||||||
|
"marked": "^12.0.2",
|
||||||
"postcss": "^8.4.38",
|
"postcss": "^8.4.38",
|
||||||
"prettier": "^3.1.1",
|
"prettier": "^3.1.1",
|
||||||
"prettier-plugin-svelte": "^3.1.2",
|
"prettier-plugin-svelte": "^3.1.2",
|
||||||
|
@ -44,6 +49,7 @@
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@dimforge/rapier3d-compat": "^0.11.2",
|
"@dimforge/rapier3d-compat": "^0.11.2",
|
||||||
|
"@tensorflow/tfjs": "^4.19.0",
|
||||||
"@threlte/core": "^7.3.0",
|
"@threlte/core": "^7.3.0",
|
||||||
"@threlte/extras": "^8.11.2",
|
"@threlte/extras": "^8.11.2",
|
||||||
"@threlte/rapier": "^2.0.0",
|
"@threlte/rapier": "^2.0.0",
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||||
|
import * as tf from '@tensorflow/tfjs-node';
|
||||||
|
import use from '@tensorflow-models/universal-sentence-encoder';
|
||||||
|
import fs from 'fs-extra';
|
||||||
|
import glob from 'glob';
|
||||||
|
import path from 'path';
|
||||||
|
import { marked } from 'marked';
|
||||||
|
|
||||||
|
async function extractTextFromMarkdown(filePath) {
|
||||||
|
const markdown = await fs.readFile(filePath, 'utf8');
|
||||||
|
return marked(markdown).replace(/<[^>]*>/g, ''); // Strip HTML tags generated by marked
|
||||||
|
}
|
||||||
|
|
||||||
|
async function generateEmbeddingsForDirectory(directoryPath) {
|
||||||
|
// Get all markdown files in directory
|
||||||
|
const files = glob.sync(`${directoryPath}/*.md`);
|
||||||
|
|
||||||
|
// Extract texts from markdown files
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||||
|
const poems = await Promise.all(files.map(async (file, _index) => ({
|
||||||
|
id: path.basename(file, '.md'), // Use filename as ID
|
||||||
|
text: await extractTextFromMarkdown(file)
|
||||||
|
})));
|
||||||
|
|
||||||
|
// Load the Universal Sentence Encoder model
|
||||||
|
const model = await use.load();
|
||||||
|
const embeddings = await Promise.all(poems.map(poem => model.embed([poem.text])));
|
||||||
|
|
||||||
|
// Map embeddings back to poem objects
|
||||||
|
const poemEmbeddings = poems.map((poem, index) => ({
|
||||||
|
id: poem.id,
|
||||||
|
vector: embeddings[index].arraySync()[0] // Extract the vector
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Save embeddings to JSON file
|
||||||
|
fs.writeJson('embeddings.json', poemEmbeddings);
|
||||||
|
}
|
||||||
|
|
||||||
|
generateEmbeddingsForDirectory('src/posts/poetry'); // Update path accordingly
|
|
@ -0,0 +1,10 @@
|
||||||
|
import { getModel } from '$lib/utils/search';
|
||||||
|
import { building } from '$app/environment';
|
||||||
|
|
||||||
|
if (!building) {
|
||||||
|
getModel().catch((error) => {
|
||||||
|
console.error('Failed to load the TensorFlow model at startup:', error);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Model loaded successfully!');
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
<script lang="ts">
|
||||||
|
import { searchResults } from '$lib/store';
|
||||||
|
import type { SearchResult } from '$lib/utils/search';
|
||||||
|
|
||||||
|
let results: SearchResult[] = [];
|
||||||
|
|
||||||
|
searchResults.subscribe((value: SearchResult[]) => {
|
||||||
|
results = value ? value : [];
|
||||||
|
});
|
||||||
|
|
||||||
|
function slugToTitle(slug: string) {
|
||||||
|
return slug
|
||||||
|
.replace(/-/g, ' ')
|
||||||
|
.split(' ')
|
||||||
|
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
|
||||||
|
.join(' ');
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
{#if results.length > 0}
|
||||||
|
<div class="container mx-auto flex flex-col items-center">
|
||||||
|
<div class="prose">
|
||||||
|
<h1 class="py-6">Search results:</h1>
|
||||||
|
</div>
|
||||||
|
<ul>
|
||||||
|
{#each results as result}
|
||||||
|
<li class="py-4">
|
||||||
|
<h3 class="pb-1">
|
||||||
|
<a class="link" href="/poetry/{result.poem.id}">{slugToTitle(result.poem.id)}</a>
|
||||||
|
<p class="text-sm">(Relevance: {(result.similarity * 100).toFixed(3)})%</p>
|
||||||
|
</h3>
|
||||||
|
</li>
|
||||||
|
{/each}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
{/if}
|
|
@ -0,0 +1,5 @@
|
||||||
|
import { writable } from 'svelte/store';
|
||||||
|
import type { SearchResult } from './utils/search';
|
||||||
|
|
||||||
|
const initArray: SearchResult[] = [];
|
||||||
|
export const searchResults = writable(initArray);
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,24 @@
|
||||||
|
// src/lib/initModel.ts
|
||||||
|
import use, { UniversalSentenceEncoder } from '@tensorflow-models/universal-sentence-encoder';
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||||
|
import * as tf from '@tensorflow/tfjs-node';
|
||||||
|
|
||||||
|
export type Embedding = {
|
||||||
|
id: string;
|
||||||
|
vector: number[];
|
||||||
|
};
|
||||||
|
|
||||||
|
export type SearchResult = {
|
||||||
|
poem: Embedding;
|
||||||
|
similarity: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
let model: UniversalSentenceEncoder | null = null;
|
||||||
|
|
||||||
|
export async function getModel(): Promise<UniversalSentenceEncoder> {
|
||||||
|
if (!model) {
|
||||||
|
model = await use.load();
|
||||||
|
console.log('Model loaded successfully!');
|
||||||
|
}
|
||||||
|
return model;
|
||||||
|
}
|
|
@ -5,8 +5,9 @@ date: 2014-11-10 00:00:00 +0000
|
||||||
tags:
|
tags:
|
||||||
- Fall
|
- Fall
|
||||||
title: Autumn Synesthesia
|
title: Autumn Synesthesia
|
||||||
|
layout: poetry
|
||||||
---
|
---
|
||||||
|
|
||||||
The trees branch out their reds and yellows.
|
The trees branch out their reds and yellows.
|
||||||
Their last battle cry before the frost.
|
Their last battle cry before the frost.
|
||||||
The further north, the more pronounced
|
The further north, the more pronounced
|
||||||
|
|
|
@ -8,8 +8,9 @@ tags:
|
||||||
- Love
|
- Love
|
||||||
- Relationships
|
- Relationships
|
||||||
title: Breakfast Blue Pt. 2
|
title: Breakfast Blue Pt. 2
|
||||||
|
layout: poetry
|
||||||
---
|
---
|
||||||
|
|
||||||
Oh, my good friend, I didn't mean to leave.
|
Oh, my good friend, I didn't mean to leave.
|
||||||
It wasn't your fault, so no longer grieve!
|
It wasn't your fault, so no longer grieve!
|
||||||
If I could, I would run back to your side.
|
If I could, I would run back to your side.
|
||||||
|
|
|
@ -6,6 +6,7 @@ tags:
|
||||||
- Humor
|
- Humor
|
||||||
- Muffins
|
- Muffins
|
||||||
title: Breakfast Blues
|
title: Breakfast Blues
|
||||||
|
layout: poetry
|
||||||
---
|
---
|
||||||
|
|
||||||
Lost my reason to wake in the morning
|
Lost my reason to wake in the morning
|
||||||
|
|
|
@ -5,7 +5,6 @@ date: 2019-03-06 12:00:00 +0000
|
||||||
tags:
|
tags:
|
||||||
- Christian
|
- Christian
|
||||||
title: Djinn
|
title: Djinn
|
||||||
year: 2019
|
|
||||||
layout: poetry
|
layout: poetry
|
||||||
---
|
---
|
||||||
|
|
||||||
|
@ -18,4 +17,3 @@ I let out a chuckle
|
||||||
And a sigh of relief.
|
And a sigh of relief.
|
||||||
"He didn't holy ghost us
|
"He didn't holy ghost us
|
||||||
Despite such unbelief."
|
Despite such unbelief."
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ tags:
|
||||||
- Wisdom
|
- Wisdom
|
||||||
- Lessons
|
- Lessons
|
||||||
title: I've Seen
|
title: I've Seen
|
||||||
year: 2018
|
layout: poetry
|
||||||
---
|
---
|
||||||
|
|
||||||
I've seen tears fall
|
I've seen tears fall
|
||||||
|
|
|
@ -8,6 +8,7 @@ tags:
|
||||||
- Relationships
|
- Relationships
|
||||||
- Introspective
|
- Introspective
|
||||||
title: Infrared Homing
|
title: Infrared Homing
|
||||||
|
layout: poetry
|
||||||
---
|
---
|
||||||
|
|
||||||
In a moment
|
In a moment
|
||||||
|
|
|
@ -6,7 +6,6 @@ tags:
|
||||||
- Chess
|
- Chess
|
||||||
- Lessons
|
- Lessons
|
||||||
title: King's Pin
|
title: King's Pin
|
||||||
year: 2018
|
|
||||||
---
|
---
|
||||||
|
|
||||||
You placed my king
|
You placed my king
|
||||||
|
|
|
@ -1,5 +1,21 @@
|
||||||
<script>
|
<script lang="ts">
|
||||||
import '../../app.css';
|
import '../../app.css';
|
||||||
|
import { searchResults } from '$lib/store';
|
||||||
|
import type { SearchResult } from '$lib/utils/search';
|
||||||
|
import SearchResults from '$lib/components/SearchResults.svelte';
|
||||||
|
|
||||||
|
let searchQuery = '';
|
||||||
|
|
||||||
|
async function handleSearch() {
|
||||||
|
const response = await fetch(`/api/poetry/search?q=${encodeURIComponent(searchQuery)}`);
|
||||||
|
if (response.ok) {
|
||||||
|
const data: SearchResult[] = await response.json();
|
||||||
|
searchResults.set(data);
|
||||||
|
} else {
|
||||||
|
console.error('Failed to fetch search results');
|
||||||
|
searchResults.set([]);
|
||||||
|
}
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="flex flex-col h-screen">
|
<div class="flex flex-col h-screen">
|
||||||
|
@ -35,7 +51,27 @@
|
||||||
</div>
|
</div>
|
||||||
<a class="link-primary text-xl" href="/">silentsilas</a>
|
<a class="link-primary text-xl" href="/">silentsilas</a>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="navbar-end lg:hidden">
|
||||||
|
<div class="form-control">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
placeholder="Search"
|
||||||
|
class="input input-bordered md:w-auto"
|
||||||
|
bind:value={searchQuery}
|
||||||
|
on:input={handleSearch}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div class="navbar-end hidden lg:flex">
|
<div class="navbar-end hidden lg:flex">
|
||||||
|
<div class="form-control">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
placeholder="Search"
|
||||||
|
class="input input-bordered md:w-auto"
|
||||||
|
bind:value={searchQuery}
|
||||||
|
on:input={handleSearch}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
<ul class="menu menu-horizontal px-1">
|
<ul class="menu menu-horizontal px-1">
|
||||||
<li><a href="/thoughts">Thoughts</a></li>
|
<li><a href="/thoughts">Thoughts</a></li>
|
||||||
<li><a href="/poetry">Poetry</a></li>
|
<li><a href="/poetry">Poetry</a></li>
|
||||||
|
@ -48,5 +84,6 @@
|
||||||
|
|
||||||
<div class="flex flex-col items-center flex-1 overflow-auto">
|
<div class="flex flex-col items-center flex-1 overflow-auto">
|
||||||
<slot />
|
<slot />
|
||||||
|
<SearchResults />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -1,6 +1,14 @@
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { onMount } from 'svelte';
|
import { onMount } from 'svelte';
|
||||||
import { fade } from 'svelte/transition';
|
import { fade } from 'svelte/transition';
|
||||||
|
import { searchResults } from '$lib/store';
|
||||||
|
import type { SearchResult } from '$lib/utils/search';
|
||||||
|
|
||||||
|
let results: SearchResult[] = [];
|
||||||
|
|
||||||
|
searchResults.subscribe((value: SearchResult[]) => {
|
||||||
|
results = value ? value : [];
|
||||||
|
});
|
||||||
|
|
||||||
type Greeting = {
|
type Greeting = {
|
||||||
greeting: string;
|
greeting: string;
|
||||||
|
@ -59,12 +67,11 @@
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
{#if results.length <= 0}
|
||||||
<div class="container mx-auto flex flex-col justify-center items-center flex-1">
|
<div class="container mx-auto flex flex-col justify-center items-center flex-1">
|
||||||
<div class="justify-center items-center text-center m-10">
|
<div class="justify-center items-center text-center m-10">
|
||||||
{#if visible && currentGreeting}
|
{#if visible && currentGreeting}
|
||||||
<div
|
<div transition:fade={{ duration: 1200 }}>
|
||||||
transition:fade={{ duration: 1200 }}
|
|
||||||
>
|
|
||||||
<span class="font-bold">{currentGreeting.greeting}</span>
|
<span class="font-bold">{currentGreeting.greeting}</span>
|
||||||
{#if currentGreeting.romanisation}
|
{#if currentGreeting.romanisation}
|
||||||
<span class="text-gray-500">( {currentGreeting.romanisation} )</span>
|
<span class="text-gray-500">( {currentGreeting.romanisation} )</span>
|
||||||
|
@ -88,8 +95,8 @@
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
Here you can browse my shower <a href="/thoughts" class="link">thoughts</a> and bad
|
Here you can browse my shower <a href="/thoughts" class="link">thoughts</a> and bad
|
||||||
<a href="/poetry" class="link">poetry</a>. Opinions are personally mine and not endorsed by my
|
<a href="/poetry" class="link">poetry</a>. Opinions are personally mine and not endorsed by
|
||||||
employer.
|
my employer.
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
I tend to start a lot of <a href="/projects" class="link">projects</a>, but I'm trying to
|
I tend to start a lot of <a href="/projects" class="link">projects</a>, but I'm trying to
|
||||||
|
@ -104,3 +111,4 @@
|
||||||
<p>Shalom.</p>
|
<p>Shalom.</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
|
@ -1,9 +1,17 @@
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { goto } from '$app/navigation';
|
import { goto } from '$app/navigation';
|
||||||
import { page } from '$app/stores';
|
import { page } from '$app/stores';
|
||||||
|
import { searchResults } from '$lib/store';
|
||||||
|
import type { SearchResult } from '$lib/utils/search';
|
||||||
import type { PageData } from '../poetry/$types';
|
import type { PageData } from '../poetry/$types';
|
||||||
export let data: PageData;
|
export let data: PageData;
|
||||||
|
|
||||||
|
let results: SearchResult[] = [];
|
||||||
|
|
||||||
|
searchResults.subscribe((value: SearchResult[]) => {
|
||||||
|
results = value ? value : [];
|
||||||
|
});
|
||||||
|
|
||||||
const formatDate = (date: string) => {
|
const formatDate = (date: string) => {
|
||||||
return new Date(date).toLocaleDateString(undefined, {
|
return new Date(date).toLocaleDateString(undefined, {
|
||||||
year: 'numeric',
|
year: 'numeric',
|
||||||
|
@ -35,6 +43,7 @@
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
{#if results.length <= 0}
|
||||||
<div class="container mx-auto flex flex-col items-center">
|
<div class="container mx-auto flex flex-col items-center">
|
||||||
<div class="prose">
|
<div class="prose">
|
||||||
<h1 class="py-6">Poetry</h1>
|
<h1 class="py-6">Poetry</h1>
|
||||||
|
@ -68,3 +77,4 @@
|
||||||
>
|
>
|
||||||
</nav>
|
</nav>
|
||||||
{/if}
|
{/if}
|
||||||
|
{/if}
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
// eslint-disable-next-line
|
||||||
|
import * as tf from '@tensorflow/tfjs-node';
|
||||||
|
import poemEmbeddings from '$lib/utils/poetry/embeddings.json';
|
||||||
|
import { json } from '@sveltejs/kit';
|
||||||
|
import { getModel, type Embedding, type SearchResult } from '$lib/utils/search';
|
||||||
|
|
||||||
|
// Search handler
|
||||||
|
export const GET = async ({ url }: { url: URL }) => {
|
||||||
|
const model = await getModel();
|
||||||
|
const searchQuery = url.searchParams.get('q');
|
||||||
|
if (!searchQuery) {
|
||||||
|
return { status: 400, body: { error: 'Query parameter "q" is required' } };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Generate embedding for the query
|
||||||
|
const queryEmbedding = await model.embed([searchQuery]);
|
||||||
|
const queryVec = queryEmbedding.arraySync()[0];
|
||||||
|
|
||||||
|
// Calculate similarities
|
||||||
|
const results = poemEmbeddings
|
||||||
|
.map((poem: Embedding) => ({
|
||||||
|
poem,
|
||||||
|
similarity: cosineSimilarity(queryVec, poem.vector)
|
||||||
|
}))
|
||||||
|
.sort((a: SearchResult, b: SearchResult) => b.similarity - a.similarity)
|
||||||
|
.slice(0, 10); // Top 10 results
|
||||||
|
|
||||||
|
return json(results);
|
||||||
|
} catch (error) {
|
||||||
|
return { status: 500, body: { error: (error as Error).message } };
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
function cosineSimilarity(vecA: number[], vecB: number[]) {
|
||||||
|
const dotProduct = vecA.reduce((acc, val, i) => acc + val * vecB[i], 0);
|
||||||
|
const magnitudeA = Math.sqrt(vecA.reduce((acc, val) => acc + val * val, 0));
|
||||||
|
const magnitudeB = Math.sqrt(vecB.reduce((acc, val) => acc + val * val, 0));
|
||||||
|
return dotProduct / (magnitudeA * magnitudeB);
|
||||||
|
}
|
Loading…
Reference in New Issue