debug(search.py): with more logs when check sync of indexing
All checks were successful
Deploy on push / deploy (push) Successful in 1m3s
All checks were successful
Deploy on push / deploy (push) Successful in 1m3s
This commit is contained in:
parent
1fd623a660
commit
f089a32394
|
@ -17,7 +17,7 @@ if __name__ == "__main__":
|
||||||
address="0.0.0.0",
|
address="0.0.0.0",
|
||||||
port=PORT,
|
port=PORT,
|
||||||
interface=Interfaces.ASGI,
|
interface=Interfaces.ASGI,
|
||||||
workers=4,
|
workers=1,
|
||||||
websockets=False,
|
websockets=False,
|
||||||
log_level=LogLevels.debug,
|
log_level=LogLevels.debug,
|
||||||
backlog=2048,
|
backlog=2048,
|
||||||
|
|
|
@ -451,6 +451,24 @@ async def initialize_search_index(shouts_data):
|
||||||
index_stats = info.get("index_stats", {})
|
index_stats = info.get("index_stats", {})
|
||||||
indexed_doc_count = index_stats.get("document_count", 0)
|
indexed_doc_count = index_stats.get("document_count", 0)
|
||||||
|
|
||||||
|
# Log database document summary
|
||||||
|
db_ids = [str(shout.id) for shout in shouts_data]
|
||||||
|
logger.info(f"Database contains {len(shouts_data)} documents. Sample IDs: {', '.join(db_ids[:5])}...")
|
||||||
|
|
||||||
|
# Calculate summary by ID range to understand the coverage
|
||||||
|
try:
|
||||||
|
# Parse numeric IDs where possible to analyze coverage
|
||||||
|
numeric_ids = [int(sid) for sid in db_ids if sid.isdigit()]
|
||||||
|
if numeric_ids:
|
||||||
|
min_id = min(numeric_ids)
|
||||||
|
max_id = max(numeric_ids)
|
||||||
|
id_range = max_id - min_id + 1
|
||||||
|
coverage_pct = (len(numeric_ids) / id_range) * 100 if id_range > 0 else 0
|
||||||
|
logger.info(f"ID range analysis: min_id={min_id}, max_id={max_id}, range={id_range}, "
|
||||||
|
f"coverage={coverage_pct:.1f}% ({len(numeric_ids)}/{id_range})")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not analyze ID ranges: {e}")
|
||||||
|
|
||||||
# If counts are significantly different, do verification
|
# If counts are significantly different, do verification
|
||||||
if abs(indexed_doc_count - len(shouts_data)) > 10:
|
if abs(indexed_doc_count - len(shouts_data)) > 10:
|
||||||
logger.info(f"Document count mismatch: {indexed_doc_count} in index vs {len(shouts_data)} in database. Verifying...")
|
logger.info(f"Document count mismatch: {indexed_doc_count} in index vs {len(shouts_data)} in database. Verifying...")
|
||||||
|
@ -469,6 +487,7 @@ async def initialize_search_index(shouts_data):
|
||||||
missing_ids = verification.get("missing", [])
|
missing_ids = verification.get("missing", [])
|
||||||
if missing_ids:
|
if missing_ids:
|
||||||
logger.info(f"Found {len(missing_ids)} documents missing from index. Indexing them...")
|
logger.info(f"Found {len(missing_ids)} documents missing from index. Indexing them...")
|
||||||
|
logger.info(f"Sample missing IDs: {', '.join(missing_ids[:10])}...")
|
||||||
missing_docs = [shout for shout in shouts_data if str(shout.id) in missing_ids]
|
missing_docs = [shout for shout in shouts_data if str(shout.id) in missing_ids]
|
||||||
await search_service.bulk_index(missing_docs)
|
await search_service.bulk_index(missing_docs)
|
||||||
else:
|
else:
|
||||||
|
@ -476,6 +495,22 @@ async def initialize_search_index(shouts_data):
|
||||||
else:
|
else:
|
||||||
logger.info(f"Search index appears to be in sync ({indexed_doc_count} documents indexed).")
|
logger.info(f"Search index appears to be in sync ({indexed_doc_count} documents indexed).")
|
||||||
|
|
||||||
|
# Optional sample verification (can be slow with large document sets)
|
||||||
|
# Uncomment if you want to periodically check a random sample even when counts match
|
||||||
|
"""
|
||||||
|
sample_size = 10
|
||||||
|
if len(db_ids) > sample_size:
|
||||||
|
sample_ids = random.sample(db_ids, sample_size)
|
||||||
|
logger.info(f"Performing random sample verification on {sample_size} documents...")
|
||||||
|
verification = await search_service.verify_docs(sample_ids)
|
||||||
|
if verification.get("missing"):
|
||||||
|
missing_count = len(verification.get("missing", []))
|
||||||
|
logger.warning(f"Random verification found {missing_count}/{sample_size} missing docs "
|
||||||
|
f"despite count match. Consider full verification.")
|
||||||
|
else:
|
||||||
|
logger.info("Random document sample verification passed.")
|
||||||
|
"""
|
||||||
|
|
||||||
# Verify with test query
|
# Verify with test query
|
||||||
try:
|
try:
|
||||||
test_query = "test"
|
test_query = "test"
|
||||||
|
@ -484,6 +519,15 @@ async def initialize_search_index(shouts_data):
|
||||||
|
|
||||||
if test_results:
|
if test_results:
|
||||||
logger.info(f"Search verification successful: found {len(test_results)} results")
|
logger.info(f"Search verification successful: found {len(test_results)} results")
|
||||||
|
# Log categories covered by search results
|
||||||
|
categories = set()
|
||||||
|
for result in test_results:
|
||||||
|
result_id = result.get("id")
|
||||||
|
matching_shouts = [s for s in shouts_data if str(s.id) == result_id]
|
||||||
|
if matching_shouts and hasattr(matching_shouts[0], 'category'):
|
||||||
|
categories.add(getattr(matching_shouts[0], 'category', 'unknown'))
|
||||||
|
if categories:
|
||||||
|
logger.info(f"Search results cover categories: {', '.join(categories)}")
|
||||||
else:
|
else:
|
||||||
logger.warning("Search verification returned no results. Index may be empty or not working.")
|
logger.warning("Search verification returned no results. Index may be empty or not working.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user