diff --git a/LICENSE.md b/LICENSE similarity index 100% rename from LICENSE.md rename to LICENSE diff --git a/README.md b/README.md index c8ef11e..8307b90 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,6 @@ It will traverse through every `*.md` under `src/posts/poetry` and generate the ## License -This project is licensed under the [MIT License](src/branch/main/LICENSE.md). This means that you can do pretty much anything you want with this code as long as you include the original copyright and license notice in your project. +This project is licensed under the [MIT License](src/branch/main/LICENSE). This means that you can do pretty much anything you want with this code as long as you include the original copyright and license notice in your project. -Content contained in the `src/posts` folder is under the [CC BY-NC-SA-4.0](src/branch/main/src/posts/LICENSE.md) license. You're free to modify/distribute the posts contained in the `src/posts` folder so long as it's not for commercial purposes, you give attribution, and point out any modifications you've made. +Content contained in the `src/posts` folder is under the [CC BY-NC-SA-4.0](src/branch/main/src/posts/LICENSE) license. You're free to modify/distribute the posts contained in the `src/posts` folder so long as it's not for commercial purposes, you give attribution, and point out any modifications you've made. diff --git a/package-lock.json b/package-lock.json index 97865be..6864dd6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,6 +14,7 @@ "@threlte/core": "^7.3.0", "@threlte/extras": "^8.11.2", "@threlte/rapier": "^2.0.0", + "fuse.js": "^7.0.0", "marked": "^12.0.2", "mdsvex": "^0.11.0", "three": "^0.159.0" @@ -3427,6 +3428,14 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/fuse.js": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz", + "integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==", + "engines": { + "node": ">=10" + } + }, "node_modules/gauge": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/gauge/-/gauge-3.0.2.tgz", diff --git a/package.json b/package.json index 1931c4e..cf83eb4 100644 --- a/package.json +++ b/package.json @@ -52,6 +52,7 @@ "@threlte/core": "^7.3.0", "@threlte/extras": "^8.11.2", "@threlte/rapier": "^2.0.0", + "fuse.js": "^7.0.0", "marked": "^12.0.2", "mdsvex": "^0.11.0", "three": "^0.159.0" diff --git a/scripts/generate-embeddings.js b/scripts/generate-embeddings.js index f4b39c2..645e24d 100644 --- a/scripts/generate-embeddings.js +++ b/scripts/generate-embeddings.js @@ -8,32 +8,42 @@ import { marked } from 'marked'; async function extractTextFromMarkdown(filePath) { const markdown = await fs.readFile(filePath, 'utf8'); - return marked(markdown).replace(/<[^>]*>/g, ''); // Strip HTML tags generated by marked + // remove yaml frontmatter metadata + const result = markdown.replace(/---[\s\S]*?---/gm, ''); + // remove html tags + const text = marked(result).replace(/<[^>]*>/g, ''); + return text; } async function generateEmbeddingsForDirectory(directoryPath) { // Get all markdown files in directory - const files = glob.sync(`${directoryPath}/*.md`); + const files = glob.sync(`${directoryPath}/**/*.md`, { + ignore: [`${directoryPath}/LICENSE.md`] + }); // Extract texts from markdown files // eslint-disable-next-line @typescript-eslint/no-unused-vars - const poems = await Promise.all(files.map(async (file, _index) => ({ - id: path.basename(file, '.md'), // Use filename as ID - text: await extractTextFromMarkdown(file) + const posts = await Promise.all(files.map(async (file, _index) => ({ + id: path.basename(file, '.md'), // Use filename as ID, + text: await extractTextFromMarkdown(file), + section: path.basename(path.dirname(file)), + filename: path.basename(file) }))); // Load the Universal Sentence Encoder model const model = await use.load(); - const embeddings = await Promise.all(poems.map(poem => model.embed([poem.text]))); + const embeddings = await Promise.all(posts.map(post => model.embed([post.text]))); - // Map embeddings back to poem objects - const poemEmbeddings = poems.map((poem, index) => ({ - id: poem.id, - vector: embeddings[index].arraySync()[0] // Extract the vector + // Map embeddings back to post objects + const poemEmbeddings = posts.map((post, index) => ({ + id: post.id, + vector: embeddings[index].arraySync()[0], // Extract the vector + section: post.section, + filename: post.id })); // Save embeddings to JSON file fs.writeJson('embeddings.json', poemEmbeddings); } -generateEmbeddingsForDirectory('src/posts/poetry'); // Update path accordingly +generateEmbeddingsForDirectory('src/posts'); // Update path accordingly diff --git a/src/lib/components/NavBar.svelte b/src/lib/components/NavBar.svelte index 7de4368..67f7632 100644 --- a/src/lib/components/NavBar.svelte +++ b/src/lib/components/NavBar.svelte @@ -2,18 +2,22 @@ import type { SearchResult } from '$lib/utils/search'; import { searchResults } from '$lib/store'; - let searchQuery = ''; + let timer: NodeJS.Timeout | undefined; - async function handleSearch() { - // const section = window.location.pathname.split('/')[1]; - const response = await fetch(`/api/poetry/search?q=${encodeURIComponent(searchQuery)}`); - if (response.ok) { - const data: SearchResult[] = await response.json(); - searchResults.set(data); - } else { - console.error('Failed to fetch search results'); - searchResults.set([]); - } + async function handleSearch({ target }: Event) { + const { value } = target as HTMLInputElement; + clearTimeout(timer); + timer = setTimeout(async () => { + // const section = window.location.pathname.split('/')[1]; + const response = await fetch(`/api/search?q=${encodeURIComponent(value)}`); + if (response.ok) { + const data: SearchResult[] = await response.json(); + searchResults.set(data); + } else { + console.error('Failed to fetch search results'); + searchResults.set([]); + } + }, 300); } @@ -54,20 +58,13 @@ type="text" placeholder="Search" class="input w-24 md:w-auto" - bind:value={searchQuery} - on:input={handleSearch} + on:keyup={handleSearch} />
(Relevance: {(result.similarity * 100).toFixed(3)}%)
+ {slugToTitle(result.post.id)} ++ (Relevance: {(result.similarity * 100).toFixed(2)}%, Section: {result.post.section}) +