playground/scripts/rag/createVectoreStore.ts

29 lines
1.0 KiB
TypeScript

import 'cheerio';
import { CheerioWebBaseLoader } from '@langchain/community/document_loaders/web/cheerio';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { HNSWLib } from '@langchain/community/vectorstores/hnswlib';
import { OpenAIEmbeddings } from '@langchain/openai';
import * as path from 'path';
const VECTOR_STORE_PATH = 'hex_docs_vector_store';
const DOCS_DIR = path.join(process.env.HOME || '~', '.hex/docs/hexpm');
async function generateEmbeddings() {
const loader = new CheerioWebBaseLoader('https://lilianweng.github.io/posts/2023-06-23-agent/');
const docs = await loader.load();
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 200
});
const splits = await textSplitter.splitDocuments(docs);
const embeddings = new OpenAIEmbeddings();
const vectorStore = await HNSWLib.fromDocuments(splits, embeddings);
await vectorStore.save(VECTOR_STORE_PATH);
console.log('Embeddings generated and saved successfully.');
}
generateEmbeddings().catch(console.error);