get it working with text files 🎉

2023-04-09 15:33:46 -04:00
parent 68e04af5df
commit deb6052de8
10 changed files with 626 additions and 56 deletions
--- a/src/lib/env.ts
+++ b/src/lib/env.ts
@@ -0,0 +1,5 @@
+import * as dotenv from 'dotenv';
+const config = dotenv.config();
+const OPENAI_API_KEY = config.OPENAI_API_KEY;
+
+export { OPENAI_API_KEY };
--- a/src/lib/shared/stores/prompt.ts
+++ b/src/lib/shared/stores/prompt.ts
@@ -1,5 +0,0 @@
-import { writable } from 'svelte/store';
-
-const prompt = writable<string>(JSON.stringify([]));
-
-export default prompt;
--- a/src/lib/shared/stores/summary.ts
+++ b/src/lib/shared/stores/summary.ts
@@ -0,0 +1,5 @@
+import { writable } from 'svelte/store';
+
+const summary = writable<string>(JSON.stringify([]));
+
+export default summary;
--- a/src/lib/types/env.d.ts
+++ b/src/lib/types/env.d.ts
@@ -0,0 +1,3 @@
+declare module 'dotenv' {
+	function config(options?: DotenvParseOptions): { [name: string]: string };
+}
--- a/src/routes/+page.svelte
+++ b/src/routes/+page.svelte
@@ -1,6 +1,6 @@
 <script lang="ts">
 	import { goto } from '$app/navigation';
-	import prompt from '$lib/shared/stores/prompt';
+	import summary from '$lib/shared/stores/summary';

 	let files: FileList;
 	let isDisabled = true;
@@ -26,11 +26,13 @@
 		try {
 			if (files && files.length >= 1) {
 				const result = await submitFile(files[0]);
-				prompt.set(JSON.stringify(result));
+				console.log(result);
+				summary.set(JSON.stringify(result.response));
 				goto('chat');
 			}
 		} catch (error) {
-			console.error('Error:', error);
+			alert((error as App.Error).message);
+			console.error(`Error: ${JSON.stringify(error)}`);
 		}
 	};
 </script>
--- a/src/routes/+server.ts
+++ b/src/routes/+server.ts
@@ -1,35 +1,37 @@
 import { fail } from '@sveltejs/kit';
 import type { RequestHandler } from './$types';
-
-const parseFile = async (file: Blob) => {
-	const fileSize = file.size;
-	const chunkSize = 4000;
-	const chunks = Math.ceil(fileSize / chunkSize);
-	const convoArray: string[] = [];
-	const decoder = new TextDecoder();
-
-	for (let i = 0; i < chunks; i++) {
-		const start = i * chunkSize;
-		const end = Math.min(start + chunkSize, fileSize);
-		const buffer = await file.slice(start, end).arrayBuffer();
-		const text = decoder.decode(buffer);
-		convoArray.push(text);
-	}
-	return convoArray;
-};
+import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
+import { OpenAI } from 'langchain/llms';
+import { loadSummarizationChain } from 'langchain/chains';

 export const POST = (async ({ request, url }) => {
 	try {
-		const data = Object.fromEntries(await request.formData());
-		const file = data.file as Blob;
-		const conversation = await parseFile(file);
+		const form = await request.formData();

-		return new Response(JSON.stringify(conversation), {
+		const file = form.get('file');
+		if (!file) throw new Error('No prompt found in the request.');
+		const text = await (file as Blob).text();
+
+		const model = new OpenAI({ temperature: 0 });
+		const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
+		const docs = await textSplitter.createDocuments([text]);
+
+		// This convenience function creates a document chain prompted to summarize a set of documents.
+		const chain = loadSummarizationChain(model);
+		const response = await chain.call({
+			input_documents: docs
+		});
+
+		return new Response(JSON.stringify({ response: response.text }), {
 			headers: {
 				'Content-Type': 'application/json'
 			}
 		});
 	} catch (err) {
+		console.log(err);
+
+		const appError = err as App.Error;
+		console.log(appError.message);
 		throw fail(500, { err: err });
 	}
 }) satisfies RequestHandler;
--- a/src/routes/about/+page.svelte
+++ b/src/routes/about/+page.svelte
@@ -3,11 +3,25 @@
 >
 	<h1 class="self-center">what dis?</h1>
 	<p class="font-sans my-4">
-		ChatGPT has a limit of ~4k characters per prompt, so to ask questions about a large body of text
-		would require that you split the text in 4k chunks.
+		ChatGPT has a character limit per prompt, so to ask questions about a large body of text would
+		require that you split the text and ask for ChatGPT to wait until the last prompt with the text
+		before generating a summary.
+	</p>
+	<p class="font-sans my-4">
+		ChatGPT's web interface also can't be adjusted to lower the probability of the AI spouting
+		unrelated nonsense. The technical term is "temperature", and the higher the temperature, the
+		more randomness in the output.
 	</p>
 	<p class="font-sans">
-		With Sumi, you can simply upload the entire document as a text file, and it will handle
-		everything else.
+		With Sumi, you can simply enter a URL or upload the entire document as a text file, and it will
+		handle everything else.
 	</p>
+	<a
+		class="btn variant-ringed-tertiary my-12"
+		href="https://git.silentsilas.com/silentsilas/Sumi"
+		target="_blank"
+		rel="noreferrer"
+	>
+		Source code
+	</a>
 </div>
--- a/src/routes/chat/+page.svelte
+++ b/src/routes/chat/+page.svelte
@@ -1,14 +1,16 @@
 <script lang="ts">
-	import prompt from '$lib/shared/stores/prompt';
-	let currentPrompt: string[];
+	import prompt from '$lib/shared/stores/summary';
+	let currentSummary: string;

 	prompt.subscribe((val) => {
-		currentPrompt = JSON.parse(val);
+		console.log(val);
+		currentSummary = JSON.parse(val);
 	});
 </script>

 <div
 	class="container h-full mx-auto flex flex-col justify-center items-center text-base max-w-prose px-4"
 >
-	<div>WIP</div>
+	<h2 class="my-4">Summary</h2>
+	<div>{currentSummary}</div>
 </div>