from langchain_community.vectorstores import Chroma from backend.ai.embedding import watson_embedding from langchain_core.documents import Document from langchain_text_splitters import RecursiveCharacterTextSplitter from pathlib import Path def main(): # data 폴더 안의 파일을 읽은 후 data_dir = Path("data") documents = [] # Document 객체 생성 for file_path in data_dir.glob("*.txt"): content = file_path.read_text(encoding="utf-8") documents.append(Document(page_content=content, metadata={"source": str(file_path.name)})) # 분할 splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=30) splite_docs = splitter.split_documents(documents) # 인덱스 설정(벡터 db) ./vectordb Chroma.from_documents(documents = splite_docs, embedding=watson_embedding, persist_directory="./vectordb") if __name__ == "__main__": main()