debug(search.py): encrease batch size for bulk indexing
All checks were successful
Deploy on push / deploy (push) Successful in 53s

This commit is contained in:
Stepan Vladovskiy 2025-03-21 17:48:26 -03:00
parent f1d9f4e036
commit fb820f67fd

View File

@ -96,7 +96,7 @@ class SearchService:
start_time = time.time() start_time = time.time()
logger.info(f"Starting bulk indexing of {len(shouts)} documents") logger.info(f"Starting bulk indexing of {len(shouts)} documents")
MAX_TEXT_LENGTH = 4000 # Maximum text length to send in a single request MAX_TEXT_LENGTH = 8000 # Maximum text length to send in a single request
max_batch_size = MAX_BATCH_SIZE max_batch_size = MAX_BATCH_SIZE
total_indexed = 0 total_indexed = 0
total_skipped = 0 total_skipped = 0
@ -176,17 +176,17 @@ class SearchService:
# Process small documents (larger batches) # Process small documents (larger batches)
if small_docs: if small_docs:
batch_size = min(max_batch_size, 15) batch_size = min(max_batch_size, 25)
await self._process_document_batches(small_docs, batch_size, "small") await self._process_document_batches(small_docs, batch_size, "small")
# Process medium documents (medium batches) # Process medium documents (medium batches)
if medium_docs: if medium_docs:
batch_size = min(max_batch_size, 10) batch_size = min(max_batch_size, 15)
await self._process_document_batches(medium_docs, batch_size, "medium") await self._process_document_batches(medium_docs, batch_size, "medium")
# Process large documents (small batches) # Process large documents (small batches)
if large_docs: if large_docs:
batch_size = min(max_batch_size, 3) batch_size = min(max_batch_size, 5)
await self._process_document_batches(large_docs, batch_size, "large") await self._process_document_batches(large_docs, batch_size, "large")
elapsed = time.time() - start_time elapsed = time.time() - start_time