debug: more logs, and analize precache.py
All checks were successful
Deploy on push / deploy (push) Successful in 44s

This commit is contained in:
Stepan Vladovskiy
2025-05-19 16:10:35 -03:00
parent f0d63e28f8
commit 1690ed63aa
3 changed files with 176 additions and 52 deletions

57
cache/precache.py vendored
View File

@@ -111,21 +111,48 @@ async def precache_data():
logger.info(f"{len(topics)} topics and their followings precached")
# authors
authors = get_with_stat(select(Author).where(Author.user.is_not(None)))
logger.info(f"{len(authors)} authors found in database")
for author in authors:
if isinstance(author, Author):
profile = author.dict()
author_id = profile.get("id")
user_id = profile.get("user", "").strip()
if author_id and user_id:
await cache_author(profile)
await asyncio.gather(
precache_authors_followers(author_id, session), precache_authors_follows(author_id, session)
)
else:
logger.error(f"fail caching {author}")
logger.info(f"{len(authors)} authors and their followings precached")
try:
authors = get_with_stat(select(Author).where(Author.user.is_not(None)))
logger.info(f"{len(authors)} authors found in database")
# Process authors in smaller batches to avoid long-running operations
batch_size = 50
total_processed = 0
# Create batches
author_batches = [authors[i:i + batch_size] for i in range(0, len(authors), batch_size)]
logger.info(f"Processing authors in {len(author_batches)} batches of {batch_size}")
for batch_idx, author_batch in enumerate(author_batches):
batch_tasks = []
for author in author_batch:
if isinstance(author, Author):
profile = author.dict()
author_id = profile.get("id")
user_id = profile.get("user", "").strip()
if author_id and user_id:
# Add task to the batch
cache_task = cache_author(profile)
follower_task = precache_authors_followers(author_id, session)
follows_task = precache_authors_follows(author_id, session)
batch_tasks.extend([cache_task, follower_task, follows_task])
else:
logger.error(f"fail caching {author}")
# Run all tasks for this batch with timeout
if batch_tasks:
try:
await asyncio.wait_for(asyncio.gather(*batch_tasks), timeout=30)
total_processed += len(author_batch)
logger.info(f"Processed batch {batch_idx+1}/{len(author_batches)} ({total_processed}/{len(authors)} authors)")
except asyncio.TimeoutError:
logger.error(f"Timeout processing author batch {batch_idx+1}, continuing with next batch")
logger.info(f"{total_processed} authors and their followings precached (out of {len(authors)} total)")
except Exception as author_exc:
import traceback
logger.error(f"Error processing authors: {author_exc}")
logger.error(traceback.format_exc())
except Exception as exc:
import traceback