diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c12cf79..1d5d57ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ +#### [0.4.19] - 2025-04-14 +- dropped `Shout.description` and `Draft.description` to be UX-generated +- use redis to init views counters after migrator + #### [0.4.18] - 2025-04-10 +- Fixed `Topic.stat.authors` and `Topic.stat.comments` - Fixed unique constraint violation for empty slug values: - Modified `update_draft` resolver to handle empty slug values - Modified `create_draft` resolver to prevent empty slug values diff --git a/orm/draft.py b/orm/draft.py index c29794c5..1c669f02 100644 --- a/orm/draft.py +++ b/orm/draft.py @@ -31,6 +31,7 @@ class Draft(Base): # required created_at: int = Column(Integer, nullable=False, default=lambda: int(time.time())) created_by: int = Column(ForeignKey("author.id"), nullable=False) + community: int = Column(ForeignKey("community.id"), nullable=False, default=1) # optional layout: str = Column(String, nullable=True, default="article") @@ -38,7 +39,6 @@ class Draft(Base): title: str = Column(String, nullable=True) subtitle: str | None = Column(String, nullable=True) lead: str | None = Column(String, nullable=True) - description: str | None = Column(String, nullable=True) body: str = Column(String, nullable=False, comment="Body") media: dict | None = Column(JSON, nullable=True) cover: str | None = Column(String, nullable=True, comment="Cover image url") diff --git a/orm/shout.py b/orm/shout.py index 37734aca..d74e84d4 100644 --- a/orm/shout.py +++ b/orm/shout.py @@ -91,7 +91,6 @@ class Shout(Base): cover: str | None = Column(String, nullable=True, comment="Cover image url") cover_caption: str | None = Column(String, nullable=True, comment="Cover image alt caption") lead: str | None = Column(String, nullable=True) - description: str | None = Column(String, nullable=True) title: str = Column(String, nullable=False) subtitle: str | None = Column(String, nullable=True) layout: str = Column(String, nullable=False, default="article") diff --git a/requirements.txt b/requirements.txt index 0f4d2f7f..08f492e5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,5 @@ granian httpx orjson -pydantic \ No newline at end of file +pydantic +trafilatura \ No newline at end of file diff --git a/resolvers/draft.py b/resolvers/draft.py index 2971ec4c..2f47e01a 100644 --- a/resolvers/draft.py +++ b/resolvers/draft.py @@ -1,6 +1,7 @@ import time from operator import or_ +import trafilatura from sqlalchemy.sql import and_ from cache.cache import ( @@ -30,7 +31,6 @@ def create_shout_from_draft(session, draft, author_id): cover=draft.cover, cover_caption=draft.cover_caption, lead=draft.lead, - description=draft.description, title=draft.title, subtitle=draft.subtitle, layout=draft.layout, @@ -104,7 +104,7 @@ async def create_draft(_, info, draft_input): if "title" not in draft_input or not draft_input["title"]: draft_input["title"] = "" # Пустая строка вместо NULL - + # Проверяем slug - он должен быть или не пустым, или не передаваться вообще if "slug" in draft_input and (draft_input["slug"] is None or draft_input["slug"] == ""): # При создании черновика удаляем пустой slug из входных данных @@ -116,6 +116,10 @@ async def create_draft(_, info, draft_input): if "id" in draft_input: del draft_input["id"] + if "seo" not in draft_input and not draft_input["seo"]: + body_teaser = draft_input.get("body", "")[:300].split("\n")[:-1].join("\n") + draft_input["seo"] = draft_input.get("lead", body_teaser) + # Добавляем текущее время создания draft_input["created_at"] = int(time.time()) @@ -161,12 +165,20 @@ async def update_draft(_, info, draft_id: int, draft_input): if not draft: return {"error": "Draft not found"} + if "seo" not in draft_input and not draft.seo: + body_src = draft_input["body"] if "body" in draft_input else draft.body + body_text = trafilatura.extract(body_src) + lead_src = draft_input["lead"] if "lead" in draft_input else draft.lead + lead_text = trafilatura.extract(lead_src) + body_teaser = body_text[:300].split(". ")[:-1].join(".\n") + draft_input["seo"] = lead_text or body_teaser + Draft.update(draft, draft_input) # Set updated_at and updated_by from the authenticated user current_time = int(time.time()) draft.updated_at = current_time draft.updated_by = author_id - + session.commit() return {"draft": draft} @@ -267,7 +279,6 @@ async def publish_shout(_, info, shout_id: int): shout.cover = draft.cover shout.cover_caption = draft.cover_caption shout.lead = draft.lead - shout.description = draft.description shout.layout = draft.layout shout.media = draft.media shout.lang = draft.lang diff --git a/resolvers/editor.py b/resolvers/editor.py index 1efc40cc..6d0b396f 100644 --- a/resolvers/editor.py +++ b/resolvers/editor.py @@ -1,6 +1,7 @@ import time import orjson +import trafilatura from sqlalchemy import and_, desc, select from sqlalchemy.orm import joinedload from sqlalchemy.sql.functions import coalesce @@ -176,9 +177,16 @@ async def create_shout(_, info, inp): logger.info(f"Creating shout with input: {inp}") # Создаем публикацию без topics + body = inp.get("body", "") + lead = inp.get("lead", "") + body_text = trafilatura.extract(body) + lead_text = trafilatura.extract(lead) + seo = inp.get("seo", lead_text or body_text[:300].split(". ")[:-1].join(". ")) new_shout = Shout( slug=slug, - body=inp.get("body", ""), + body=body, + seo=seo, + lead=lead, layout=inp.get("layout", "article"), title=inp.get("title", ""), created_by=author_id, @@ -380,7 +388,7 @@ def patch_topics(session, shout, topics_input): # @login_required async def update_shout(_, info, shout_id: int, shout_input=None, publish=False): logger.info(f"Starting update_shout with id={shout_id}, publish={publish}") - logger.debug(f"Full shout_input: {shout_input}") + logger.debug(f"Full shout_input: {shout_input}") # DraftInput user_id = info.context.get("user_id") roles = info.context.get("roles", []) diff --git a/resolvers/topic.py b/resolvers/topic.py index da46734d..4ecf241b 100644 --- a/resolvers/topic.py +++ b/resolvers/topic.py @@ -6,7 +6,7 @@ from cache.cache import ( get_cached_topic_authors, get_cached_topic_by_slug, get_cached_topic_followers, - invalidate_cache_by_prefix + invalidate_cache_by_prefix, ) from orm.author import Author from orm.topic import Topic @@ -126,7 +126,7 @@ async def get_topics_with_stats(limit=100, offset=0, community_id=None, by=None) GROUP BY topic """ followers_stats = {row[0]: row[1] for row in session.execute(text(followers_stats_query))} - + # Запрос на получение статистики авторов для выбранных тем authors_stats_query = f""" SELECT st.topic, COUNT(DISTINCT sa.author) as authors_count @@ -149,7 +149,6 @@ async def get_topics_with_stats(limit=100, offset=0, community_id=None, by=None) """ comments_stats = {row[0]: row[1] for row in session.execute(text(comments_stats_query))} - # Формируем результат с добавлением статистики result = [] for topic in topics: @@ -158,7 +157,7 @@ async def get_topics_with_stats(limit=100, offset=0, community_id=None, by=None) "shouts": shouts_stats.get(topic.id, 0), "followers": followers_stats.get(topic.id, 0), "authors": authors_stats.get(topic.id, 0), - "comments": comments_stats.get(topic.id, 0) + "comments": comments_stats.get(topic.id, 0), } result.append(topic_dict) diff --git a/schema/input.graphql b/schema/input.graphql index ff3fa4dd..c1637723 100644 --- a/schema/input.graphql +++ b/schema/input.graphql @@ -33,7 +33,6 @@ input DraftInput { main_topic_id: Int # Changed from main_topic: Topic media: [MediaItemInput] # Changed to use MediaItemInput lead: String - description: String subtitle: String lang: String seo: String diff --git a/schema/type.graphql b/schema/type.graphql index ff82b4bc..eb8e2770 100644 --- a/schema/type.graphql +++ b/schema/type.graphql @@ -80,7 +80,6 @@ type Shout { layout: String! lead: String - description: String subtitle: String lang: String cover: String @@ -100,6 +99,7 @@ type Shout { featured_at: Int deleted_at: Int + seo: String # generated if not set version_of: Shout # TODO: use version_of somewhere draft: Draft media: [MediaItem] @@ -111,13 +111,12 @@ type Draft { id: Int! created_at: Int! created_by: Author! - + community: Community! layout: String slug: String title: String subtitle: String lead: String - description: String body: String media: [MediaItem] cover: String diff --git a/services/viewed.py b/services/viewed.py index a388ea0b..a9ddeed1 100644 --- a/services/viewed.py +++ b/services/viewed.py @@ -2,9 +2,7 @@ import asyncio import os import time from datetime import datetime, timedelta, timezone -from typing import Dict - -import orjson +from typing import Dict, Optional # ga from google.analytics.data_v1beta import BetaAnalyticsDataClient @@ -20,33 +18,39 @@ from orm.author import Author from orm.shout import Shout, ShoutAuthor, ShoutTopic from orm.topic import Topic from services.db import local_session +from services.redis import redis from utils.logger import root_logger as logger GOOGLE_KEYFILE_PATH = os.environ.get("GOOGLE_KEYFILE_PATH", "/dump/google-service.json") GOOGLE_PROPERTY_ID = os.environ.get("GOOGLE_PROPERTY_ID", "") -VIEWS_FILEPATH = "/dump/views.json" class ViewedStorage: + """ + Класс для хранения и доступа к данным о просмотрах. + Использует Redis в качестве основного хранилища и Google Analytics для сбора новых данных. + """ + lock = asyncio.Lock() - precounted_by_slug = {} views_by_shout = {} shouts_by_topic = {} shouts_by_author = {} views = None period = 60 * 60 # каждый час - analytics_client: BetaAnalyticsDataClient | None = None + analytics_client: Optional[BetaAnalyticsDataClient] = None auth_result = None running = False + redis_views_key = None + last_update_timestamp = 0 start_date = datetime.now().strftime("%Y-%m-%d") @staticmethod async def init(): - """Подключение к клиенту Google Analytics с использованием аутентификации""" + """Подключение к клиенту Google Analytics и загрузка данных о просмотрах из Redis""" self = ViewedStorage async with self.lock: - # Загрузка предварительно подсчитанных просмотров из файла JSON - self.load_precounted_views() + # Загрузка предварительно подсчитанных просмотров из Redis + await self.load_views_from_redis() os.environ.setdefault("GOOGLE_APPLICATION_CREDENTIALS", GOOGLE_KEYFILE_PATH) if GOOGLE_KEYFILE_PATH and os.path.isfile(GOOGLE_KEYFILE_PATH): @@ -62,40 +66,54 @@ class ViewedStorage: self.running = False @staticmethod - def load_precounted_views(): - """Загрузка предварительно подсчитанных просмотров из файла JSON""" + async def load_views_from_redis(): + """Загрузка предварительно подсчитанных просмотров из Redis""" self = ViewedStorage - viewfile_path = VIEWS_FILEPATH - if not os.path.exists(viewfile_path): - viewfile_path = os.path.join(os.path.curdir, "views.json") - if not os.path.exists(viewfile_path): - logger.warning(" * views.json not found") - return - logger.info(f" * loading views from {viewfile_path}") - try: - start_date_int = os.path.getmtime(viewfile_path) - start_date_str = datetime.fromtimestamp(start_date_int).strftime("%Y-%m-%d") - self.start_date = start_date_str + # Подключаемся к Redis если соединение не установлено + if not redis._client: + await redis.connect() + + # Получаем список всех ключей migrated_views_* и находим самый последний + keys = await redis.execute("KEYS", "migrated_views_*") + if not keys: + logger.warning(" * No migrated_views keys found in Redis") + return + + # Фильтруем только ключи timestamp формата (исключаем migrated_views_slugs) + timestamp_keys = [k for k in keys if k != "migrated_views_slugs"] + if not timestamp_keys: + logger.warning(" * No migrated_views timestamp keys found in Redis") + return + + # Сортируем по времени создания (в названии ключа) и берем последний + timestamp_keys.sort() + latest_key = timestamp_keys[-1] + self.redis_views_key = latest_key + + # Получаем метку времени создания для установки start_date + timestamp = await redis.execute("HGET", latest_key, "_timestamp") + if timestamp: + self.last_update_timestamp = int(timestamp) + timestamp_dt = datetime.fromtimestamp(int(timestamp)) + self.start_date = timestamp_dt.strftime("%Y-%m-%d") + + # Если данные сегодняшние, считаем их актуальными now_date = datetime.now().strftime("%Y-%m-%d") - if now_date == self.start_date: - logger.info(" * views data is up to date!") + logger.info(" * Views data is up to date!") else: - logger.warn(f" * {viewfile_path} is too old: {self.start_date}") + logger.warning(f" * Views data is from {self.start_date}, may need update") - with open(viewfile_path, "r") as file: - precounted_views = orjson.loads(file.read()) - self.precounted_by_slug.update(precounted_views) - logger.info(f" * {len(precounted_views)} shouts with views was loaded.") - - except Exception as e: - logger.error(f"precounted views loading error: {e}") + # Выводим информацию о количестве загруженных записей + total_entries = await redis.execute("HGET", latest_key, "_total") + if total_entries: + logger.info(f" * {total_entries} shouts with views loaded from Redis key: {latest_key}") # noinspection PyTypeChecker @staticmethod async def update_pages(): - """Запрос всех страниц от Google Analytics, отсортрованных по количеству просмотров""" + """Запрос всех страниц от Google Analytics, отсортированных по количеству просмотров""" self = ViewedStorage logger.info(" ⎧ views update from Google Analytics ---") if self.running: @@ -140,15 +158,40 @@ class ViewedStorage: self.running = False @staticmethod - def get_shout(shout_slug="", shout_id=0) -> int: - """Получение метрики просмотров shout по slug или id.""" + async def get_shout(shout_slug="", shout_id=0) -> int: + """ + Получение метрики просмотров shout по slug или id. + + Args: + shout_slug: Slug публикации + shout_id: ID публикации + + Returns: + int: Количество просмотров + """ self = ViewedStorage + + # Получаем данные из Redis для новой схемы хранения + if not redis._client: + await redis.connect() + fresh_views = self.views_by_shout.get(shout_slug, 0) - precounted_views = self.precounted_by_slug.get(shout_slug, 0) - return fresh_views + precounted_views + + # Если есть id, пытаемся получить данные из Redis по ключу migrated_views_ + if shout_id and self.redis_views_key: + precounted_views = await redis.execute("HGET", self.redis_views_key, str(shout_id)) + if precounted_views: + return fresh_views + int(precounted_views) + + # Если нет id или данных, пытаемся получить по slug из отдельного хеша + precounted_views = await redis.execute("HGET", "migrated_views_slugs", shout_slug) + if precounted_views: + return fresh_views + int(precounted_views) + + return fresh_views @staticmethod - def get_shout_media(shout_slug) -> Dict[str, int]: + async def get_shout_media(shout_slug) -> Dict[str, int]: """Получение метрики воспроизведения shout по slug.""" self = ViewedStorage @@ -157,23 +200,29 @@ class ViewedStorage: return self.views_by_shout.get(shout_slug, 0) @staticmethod - def get_topic(topic_slug) -> int: + async def get_topic(topic_slug) -> int: """Получение суммарного значения просмотров темы.""" self = ViewedStorage - return sum(self.views_by_shout.get(shout_slug, 0) for shout_slug in self.shouts_by_topic.get(topic_slug, [])) + views_count = 0 + for shout_slug in self.shouts_by_topic.get(topic_slug, []): + views_count += await self.get_shout(shout_slug=shout_slug) + return views_count @staticmethod - def get_author(author_slug) -> int: + async def get_author(author_slug) -> int: """Получение суммарного значения просмотров автора.""" self = ViewedStorage - return sum(self.views_by_shout.get(shout_slug, 0) for shout_slug in self.shouts_by_author.get(author_slug, [])) + views_count = 0 + for shout_slug in self.shouts_by_author.get(author_slug, []): + views_count += await self.get_shout(shout_slug=shout_slug) + return views_count @staticmethod def update_topics(shout_slug): """Обновление счетчиков темы по slug shout""" self = ViewedStorage with local_session() as session: - # Определение вспомогательной функции для избежа��ия повторения кода + # Определение вспомогательной функции для избежания повторения кода def update_groups(dictionary, key, value): dictionary[key] = list(set(dictionary.get(key, []) + [value]))