From 0d2054473c320a9c05f53e503bb55add4ea48271 Mon Sep 17 00:00:00 2001 From: PromptEngineer <134474669+PromtEngineer@users.noreply.github.com> Date: Wed, 16 Aug 2023 22:54:30 -0700 Subject: [PATCH] Update ingest.py Changed the chunk size to 880 from 1000, the original size seems to be causing some issues for larger files. The best way is to around with it. --- ingest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingest.py b/ingest.py index f3b90a33..12dde5f7 100644 --- a/ingest.py +++ b/ingest.py @@ -123,7 +123,7 @@ def main(device_type): text_documents, python_documents = split_documents(documents) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) python_splitter = RecursiveCharacterTextSplitter.from_language( - language=Language.PYTHON, chunk_size=1000, chunk_overlap=200 + language=Language.PYTHON, chunk_size=880, chunk_overlap=200 ) texts = text_splitter.split_documents(text_documents) texts.extend(python_splitter.split_documents(python_documents))