migrate away from deprecated svelte 4 syntax, let ai chatbot use user's keys that's never sent to server, get threlte/studio working, refactor search results
This commit is contained in:
28
scripts/rag/createVectoreStore.ts
Normal file
28
scripts/rag/createVectoreStore.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import 'cheerio';
|
||||
import { CheerioWebBaseLoader } from '@langchain/community/document_loaders/web/cheerio';
|
||||
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
||||
import { HNSWLib } from '@langchain/community/vectorstores/hnswlib';
|
||||
import { OpenAIEmbeddings } from '@langchain/openai';
|
||||
import * as path from 'path';
|
||||
|
||||
const VECTOR_STORE_PATH = 'hex_docs_vector_store';
|
||||
const DOCS_DIR = path.join(process.env.HOME || '~', '.hex/docs/hexpm');
|
||||
|
||||
async function generateEmbeddings() {
|
||||
const loader = new CheerioWebBaseLoader('https://lilianweng.github.io/posts/2023-06-23-agent/');
|
||||
const docs = await loader.load();
|
||||
|
||||
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 1000,
|
||||
chunkOverlap: 200
|
||||
});
|
||||
const splits = await textSplitter.splitDocuments(docs);
|
||||
|
||||
const embeddings = new OpenAIEmbeddings();
|
||||
const vectorStore = await HNSWLib.fromDocuments(splits, embeddings);
|
||||
await vectorStore.save(VECTOR_STORE_PATH);
|
||||
|
||||
console.log('Embeddings generated and saved successfully.');
|
||||
}
|
||||
|
||||
generateEmbeddings().catch(console.error);
|
Reference in New Issue
Block a user