Files
Source/project/CALLCENTER_APP/backend/scripts/build_vector_db.py
T
cooney 5f5edb1e6d callcenter 프로젝트 완료
- 상담 기록 분석 및 요약
- 상담 기록 db 저장
- 상담 평가
- 상담 평가 db 저장
2026-06-18 13:13:31 +09:00

27 lines
906 B
Python

from langchain_chroma import Chroma
from backend.ai.embedding import watson_embedding
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pathlib import Path
def main():
# data 폴더 안의 파일을 읽은 후
data_dir = Path("data")
documents = []
# Document 객체 생성
for file_path in data_dir.glob("*.txt"):
content = file_path.read_text(encoding="utf-8")
documents.append(Document(page_content=content, metadata={"source": str(file_path.name)}))
# 분할
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=30)
splite_docs = splitter.split_documents(documents)
# 인덱스 설정(벡터 db) ./vectordb
Chroma.from_documents(documents = splite_docs, embedding=watson_embedding, persist_directory="./vectordb")
if __name__ == "__main__":
main()