import asyncio import json from sqlalchemy import and_, join, select from cache.cache import cache_author, cache_topic from orm.author import Author, AuthorFollower from orm.shout import Shout, ShoutAuthor, ShoutReactionsFollower, ShoutTopic from orm.topic import Topic, TopicFollower from resolvers.stat import get_with_stat from services.db import local_session from services.redis import redis from utils.encoders import CustomJSONEncoder from utils.logger import root_logger as logger # Предварительное кеширование подписчиков автора async def precache_authors_followers(author_id, session): authors_followers = set() followers_query = select(AuthorFollower.follower).where(AuthorFollower.author == author_id) result = session.execute(followers_query) authors_followers.update(row[0] for row in result if row[0]) followers_payload = json.dumps(list(authors_followers), cls=CustomJSONEncoder) await redis.execute("SET", f"author:followers:{author_id}", followers_payload) # Предварительное кеширование подписок автора async def precache_authors_follows(author_id, session): follows_topics_query = select(TopicFollower.topic).where(TopicFollower.follower == author_id) follows_authors_query = select(AuthorFollower.author).where(AuthorFollower.follower == author_id) follows_shouts_query = select(ShoutReactionsFollower.shout).where(ShoutReactionsFollower.follower == author_id) follows_topics = {row[0] for row in session.execute(follows_topics_query) if row[0]} follows_authors = {row[0] for row in session.execute(follows_authors_query) if row[0]} follows_shouts = {row[0] for row in session.execute(follows_shouts_query) if row[0]} topics_payload = json.dumps(list(follows_topics), cls=CustomJSONEncoder) authors_payload = json.dumps(list(follows_authors), cls=CustomJSONEncoder) shouts_payload = json.dumps(list(follows_shouts), cls=CustomJSONEncoder) await asyncio.gather( redis.execute("SET", f"author:follows-topics:{author_id}", topics_payload), redis.execute("SET", f"author:follows-authors:{author_id}", authors_payload), redis.execute("SET", f"author:follows-shouts:{author_id}", shouts_payload), ) # Предварительное кеширование авторов тем async def precache_topics_authors(topic_id: int, session): topic_authors_query = ( select(ShoutAuthor.author) .select_from(join(ShoutTopic, Shout, ShoutTopic.shout == Shout.id)) .join(ShoutAuthor, ShoutAuthor.shout == Shout.id) .filter( and_( ShoutTopic.topic == topic_id, Shout.published_at.is_not(None), Shout.deleted_at.is_(None), ) ) ) topic_authors = {row[0] for row in session.execute(topic_authors_query) if row[0]} authors_payload = json.dumps(list(topic_authors), cls=CustomJSONEncoder) await redis.execute("SET", f"topic:authors:{topic_id}", authors_payload) # Предварительное кеширование подписчиков тем async def precache_topics_followers(topic_id: int, session): followers_query = select(TopicFollower.follower).where(TopicFollower.topic == topic_id) topic_followers = {row[0] for row in session.execute(followers_query) if row[0]} followers_payload = json.dumps(list(topic_followers), cls=CustomJSONEncoder) await redis.execute("SET", f"topic:followers:{topic_id}", followers_payload) async def precache_data(): logger.info("precaching...") try: key = "authorizer_env" # cache reset value = await redis.execute("HGETALL", key) await redis.execute("FLUSHDB") logger.info("redis: FLUSHDB") # Преобразуем словарь в список аргументов для HSET if value: # Если значение - словарь, преобразуем его в плоский список для HSET if isinstance(value, dict): flattened = [] for field, val in value.items(): flattened.extend([field, val]) await redis.execute("HSET", key, *flattened) else: # Предполагаем, что значение уже содержит список await redis.execute("HSET", key, *value) logger.info(f"redis hash '{key}' was restored") # Set a start time to track total execution time import time start_time = time.time() with local_session() as session: # topics q = select(Topic).where(Topic.community == 1) topics = get_with_stat(q) for topic in topics: topic_dict = topic.dict() if hasattr(topic, "dict") else topic await cache_topic(topic_dict) await asyncio.gather( precache_topics_followers(topic_dict["id"], session), precache_topics_authors(topic_dict["id"], session), ) logger.info(f"{len(topics)} topics and their followings precached") # authors try: authors = get_with_stat(select(Author).where(Author.user.is_not(None))) logger.info(f"{len(authors)} authors found in database") # Process authors in smaller batches to avoid long-running operations batch_size = 50 total_processed = 0 # Create batches author_batches = [authors[i:i + batch_size] for i in range(0, len(authors), batch_size)] logger.info(f"Processing authors in {len(author_batches)} batches of {batch_size}") for batch_idx, author_batch in enumerate(author_batches): batch_tasks = [] for author in author_batch: if isinstance(author, Author): profile = author.dict() author_id = profile.get("id") user_id = profile.get("user", "").strip() if author_id and user_id: # Add task to the batch cache_task = cache_author(profile) follower_task = precache_authors_followers(author_id, session) follows_task = precache_authors_follows(author_id, session) batch_tasks.extend([cache_task, follower_task, follows_task]) else: logger.error(f"fail caching {author}") # Run all tasks for this batch with timeout if batch_tasks: try: await asyncio.wait_for(asyncio.gather(*batch_tasks), timeout=30) total_processed += len(author_batch) logger.info(f"Processed batch {batch_idx+1}/{len(author_batches)} ({total_processed}/{len(authors)} authors)") except asyncio.TimeoutError: logger.error(f"Timeout processing author batch {batch_idx+1}, continuing with next batch") logger.info(f"{total_processed} authors and their followings precached (out of {len(authors)} total)") except Exception as author_exc: import traceback logger.error(f"Error processing authors: {author_exc}") logger.error(traceback.format_exc()) # Calculate total execution time and log it import time end_time = time.time() execution_time = end_time - start_time logger.info(f"Total precache execution time: {execution_time:.2f} seconds") # Double-check that we're actually returning and not getting stuck somewhere logger.info("Precache operation complete - returning to caller") return True except Exception as exc: import traceback traceback.print_exc() logger.error(f"Error in precache_data: {exc}") return False