Merge branch 'dev' into feat/sv-searching-txtai
All checks were successful
Deploy on push / deploy (push) Successful in 6s

:
This commit is contained in:
Stepan Vladovskiy 2025-04-15 19:20:48 -03:00
commit e382cc1ea5
10 changed files with 130 additions and 60 deletions

View File

@ -1,4 +1,9 @@
#### [0.4.19] - 2025-04-14
- dropped `Shout.description` and `Draft.description` to be UX-generated
- use redis to init views counters after migrator
#### [0.4.18] - 2025-04-10 #### [0.4.18] - 2025-04-10
- Fixed `Topic.stat.authors` and `Topic.stat.comments`
- Fixed unique constraint violation for empty slug values: - Fixed unique constraint violation for empty slug values:
- Modified `update_draft` resolver to handle empty slug values - Modified `update_draft` resolver to handle empty slug values
- Modified `create_draft` resolver to prevent empty slug values - Modified `create_draft` resolver to prevent empty slug values

View File

@ -31,6 +31,7 @@ class Draft(Base):
# required # required
created_at: int = Column(Integer, nullable=False, default=lambda: int(time.time())) created_at: int = Column(Integer, nullable=False, default=lambda: int(time.time()))
created_by: int = Column(ForeignKey("author.id"), nullable=False) created_by: int = Column(ForeignKey("author.id"), nullable=False)
community: int = Column(ForeignKey("community.id"), nullable=False, default=1)
# optional # optional
layout: str = Column(String, nullable=True, default="article") layout: str = Column(String, nullable=True, default="article")
@ -38,7 +39,6 @@ class Draft(Base):
title: str = Column(String, nullable=True) title: str = Column(String, nullable=True)
subtitle: str | None = Column(String, nullable=True) subtitle: str | None = Column(String, nullable=True)
lead: str | None = Column(String, nullable=True) lead: str | None = Column(String, nullable=True)
description: str | None = Column(String, nullable=True)
body: str = Column(String, nullable=False, comment="Body") body: str = Column(String, nullable=False, comment="Body")
media: dict | None = Column(JSON, nullable=True) media: dict | None = Column(JSON, nullable=True)
cover: str | None = Column(String, nullable=True, comment="Cover image url") cover: str | None = Column(String, nullable=True, comment="Cover image url")

View File

@ -91,7 +91,6 @@ class Shout(Base):
cover: str | None = Column(String, nullable=True, comment="Cover image url") cover: str | None = Column(String, nullable=True, comment="Cover image url")
cover_caption: str | None = Column(String, nullable=True, comment="Cover image alt caption") cover_caption: str | None = Column(String, nullable=True, comment="Cover image alt caption")
lead: str | None = Column(String, nullable=True) lead: str | None = Column(String, nullable=True)
description: str | None = Column(String, nullable=True)
title: str = Column(String, nullable=False) title: str = Column(String, nullable=False)
subtitle: str | None = Column(String, nullable=True) subtitle: str | None = Column(String, nullable=True)
layout: str = Column(String, nullable=False, default="article") layout: str = Column(String, nullable=False, default="article")

View File

@ -18,4 +18,5 @@ granian
httpx httpx
orjson orjson
pydantic pydantic
trafilatura

View File

@ -1,6 +1,7 @@
import time import time
from operator import or_ from operator import or_
import trafilatura
from sqlalchemy.sql import and_ from sqlalchemy.sql import and_
from cache.cache import ( from cache.cache import (
@ -30,7 +31,6 @@ def create_shout_from_draft(session, draft, author_id):
cover=draft.cover, cover=draft.cover,
cover_caption=draft.cover_caption, cover_caption=draft.cover_caption,
lead=draft.lead, lead=draft.lead,
description=draft.description,
title=draft.title, title=draft.title,
subtitle=draft.subtitle, subtitle=draft.subtitle,
layout=draft.layout, layout=draft.layout,
@ -104,7 +104,7 @@ async def create_draft(_, info, draft_input):
if "title" not in draft_input or not draft_input["title"]: if "title" not in draft_input or not draft_input["title"]:
draft_input["title"] = "" # Пустая строка вместо NULL draft_input["title"] = "" # Пустая строка вместо NULL
# Проверяем slug - он должен быть или не пустым, или не передаваться вообще # Проверяем slug - он должен быть или не пустым, или не передаваться вообще
if "slug" in draft_input and (draft_input["slug"] is None or draft_input["slug"] == ""): if "slug" in draft_input and (draft_input["slug"] is None or draft_input["slug"] == ""):
# При создании черновика удаляем пустой slug из входных данных # При создании черновика удаляем пустой slug из входных данных
@ -116,6 +116,10 @@ async def create_draft(_, info, draft_input):
if "id" in draft_input: if "id" in draft_input:
del draft_input["id"] del draft_input["id"]
if "seo" not in draft_input and not draft_input["seo"]:
body_teaser = draft_input.get("body", "")[:300].split("\n")[:-1].join("\n")
draft_input["seo"] = draft_input.get("lead", body_teaser)
# Добавляем текущее время создания # Добавляем текущее время создания
draft_input["created_at"] = int(time.time()) draft_input["created_at"] = int(time.time())
@ -161,12 +165,20 @@ async def update_draft(_, info, draft_id: int, draft_input):
if not draft: if not draft:
return {"error": "Draft not found"} return {"error": "Draft not found"}
if "seo" not in draft_input and not draft.seo:
body_src = draft_input["body"] if "body" in draft_input else draft.body
body_text = trafilatura.extract(body_src)
lead_src = draft_input["lead"] if "lead" in draft_input else draft.lead
lead_text = trafilatura.extract(lead_src)
body_teaser = body_text[:300].split(". ")[:-1].join(".\n")
draft_input["seo"] = lead_text or body_teaser
Draft.update(draft, draft_input) Draft.update(draft, draft_input)
# Set updated_at and updated_by from the authenticated user # Set updated_at and updated_by from the authenticated user
current_time = int(time.time()) current_time = int(time.time())
draft.updated_at = current_time draft.updated_at = current_time
draft.updated_by = author_id draft.updated_by = author_id
session.commit() session.commit()
return {"draft": draft} return {"draft": draft}
@ -267,7 +279,6 @@ async def publish_shout(_, info, shout_id: int):
shout.cover = draft.cover shout.cover = draft.cover
shout.cover_caption = draft.cover_caption shout.cover_caption = draft.cover_caption
shout.lead = draft.lead shout.lead = draft.lead
shout.description = draft.description
shout.layout = draft.layout shout.layout = draft.layout
shout.media = draft.media shout.media = draft.media
shout.lang = draft.lang shout.lang = draft.lang

View File

@ -1,6 +1,7 @@
import time import time
import orjson import orjson
import trafilatura
from sqlalchemy import and_, desc, select from sqlalchemy import and_, desc, select
from sqlalchemy.orm import joinedload from sqlalchemy.orm import joinedload
from sqlalchemy.sql.functions import coalesce from sqlalchemy.sql.functions import coalesce
@ -176,9 +177,16 @@ async def create_shout(_, info, inp):
logger.info(f"Creating shout with input: {inp}") logger.info(f"Creating shout with input: {inp}")
# Создаем публикацию без topics # Создаем публикацию без topics
body = inp.get("body", "")
lead = inp.get("lead", "")
body_text = trafilatura.extract(body)
lead_text = trafilatura.extract(lead)
seo = inp.get("seo", lead_text or body_text[:300].split(". ")[:-1].join(". "))
new_shout = Shout( new_shout = Shout(
slug=slug, slug=slug,
body=inp.get("body", ""), body=body,
seo=seo,
lead=lead,
layout=inp.get("layout", "article"), layout=inp.get("layout", "article"),
title=inp.get("title", ""), title=inp.get("title", ""),
created_by=author_id, created_by=author_id,
@ -380,7 +388,7 @@ def patch_topics(session, shout, topics_input):
# @login_required # @login_required
async def update_shout(_, info, shout_id: int, shout_input=None, publish=False): async def update_shout(_, info, shout_id: int, shout_input=None, publish=False):
logger.info(f"Starting update_shout with id={shout_id}, publish={publish}") logger.info(f"Starting update_shout with id={shout_id}, publish={publish}")
logger.debug(f"Full shout_input: {shout_input}") logger.debug(f"Full shout_input: {shout_input}") # DraftInput
user_id = info.context.get("user_id") user_id = info.context.get("user_id")
roles = info.context.get("roles", []) roles = info.context.get("roles", [])

View File

@ -6,7 +6,7 @@ from cache.cache import (
get_cached_topic_authors, get_cached_topic_authors,
get_cached_topic_by_slug, get_cached_topic_by_slug,
get_cached_topic_followers, get_cached_topic_followers,
invalidate_cache_by_prefix invalidate_cache_by_prefix,
) )
from orm.author import Author from orm.author import Author
from orm.topic import Topic from orm.topic import Topic
@ -126,7 +126,7 @@ async def get_topics_with_stats(limit=100, offset=0, community_id=None, by=None)
GROUP BY topic GROUP BY topic
""" """
followers_stats = {row[0]: row[1] for row in session.execute(text(followers_stats_query))} followers_stats = {row[0]: row[1] for row in session.execute(text(followers_stats_query))}
# Запрос на получение статистики авторов для выбранных тем # Запрос на получение статистики авторов для выбранных тем
authors_stats_query = f""" authors_stats_query = f"""
SELECT st.topic, COUNT(DISTINCT sa.author) as authors_count SELECT st.topic, COUNT(DISTINCT sa.author) as authors_count
@ -149,7 +149,6 @@ async def get_topics_with_stats(limit=100, offset=0, community_id=None, by=None)
""" """
comments_stats = {row[0]: row[1] for row in session.execute(text(comments_stats_query))} comments_stats = {row[0]: row[1] for row in session.execute(text(comments_stats_query))}
# Формируем результат с добавлением статистики # Формируем результат с добавлением статистики
result = [] result = []
for topic in topics: for topic in topics:
@ -158,7 +157,7 @@ async def get_topics_with_stats(limit=100, offset=0, community_id=None, by=None)
"shouts": shouts_stats.get(topic.id, 0), "shouts": shouts_stats.get(topic.id, 0),
"followers": followers_stats.get(topic.id, 0), "followers": followers_stats.get(topic.id, 0),
"authors": authors_stats.get(topic.id, 0), "authors": authors_stats.get(topic.id, 0),
"comments": comments_stats.get(topic.id, 0) "comments": comments_stats.get(topic.id, 0),
} }
result.append(topic_dict) result.append(topic_dict)

View File

@ -33,7 +33,6 @@ input DraftInput {
main_topic_id: Int # Changed from main_topic: Topic main_topic_id: Int # Changed from main_topic: Topic
media: [MediaItemInput] # Changed to use MediaItemInput media: [MediaItemInput] # Changed to use MediaItemInput
lead: String lead: String
description: String
subtitle: String subtitle: String
lang: String lang: String
seo: String seo: String

View File

@ -80,7 +80,6 @@ type Shout {
layout: String! layout: String!
lead: String lead: String
description: String
subtitle: String subtitle: String
lang: String lang: String
cover: String cover: String
@ -100,6 +99,7 @@ type Shout {
featured_at: Int featured_at: Int
deleted_at: Int deleted_at: Int
seo: String # generated if not set
version_of: Shout # TODO: use version_of somewhere version_of: Shout # TODO: use version_of somewhere
draft: Draft draft: Draft
media: [MediaItem] media: [MediaItem]
@ -111,13 +111,12 @@ type Draft {
id: Int! id: Int!
created_at: Int! created_at: Int!
created_by: Author! created_by: Author!
community: Community!
layout: String layout: String
slug: String slug: String
title: String title: String
subtitle: String subtitle: String
lead: String lead: String
description: String
body: String body: String
media: [MediaItem] media: [MediaItem]
cover: String cover: String

View File

@ -2,9 +2,7 @@ import asyncio
import os import os
import time import time
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Dict from typing import Dict, Optional
import orjson
# ga # ga
from google.analytics.data_v1beta import BetaAnalyticsDataClient from google.analytics.data_v1beta import BetaAnalyticsDataClient
@ -20,33 +18,39 @@ from orm.author import Author
from orm.shout import Shout, ShoutAuthor, ShoutTopic from orm.shout import Shout, ShoutAuthor, ShoutTopic
from orm.topic import Topic from orm.topic import Topic
from services.db import local_session from services.db import local_session
from services.redis import redis
from utils.logger import root_logger as logger from utils.logger import root_logger as logger
GOOGLE_KEYFILE_PATH = os.environ.get("GOOGLE_KEYFILE_PATH", "/dump/google-service.json") GOOGLE_KEYFILE_PATH = os.environ.get("GOOGLE_KEYFILE_PATH", "/dump/google-service.json")
GOOGLE_PROPERTY_ID = os.environ.get("GOOGLE_PROPERTY_ID", "") GOOGLE_PROPERTY_ID = os.environ.get("GOOGLE_PROPERTY_ID", "")
VIEWS_FILEPATH = "/dump/views.json"
class ViewedStorage: class ViewedStorage:
"""
Класс для хранения и доступа к данным о просмотрах.
Использует Redis в качестве основного хранилища и Google Analytics для сбора новых данных.
"""
lock = asyncio.Lock() lock = asyncio.Lock()
precounted_by_slug = {}
views_by_shout = {} views_by_shout = {}
shouts_by_topic = {} shouts_by_topic = {}
shouts_by_author = {} shouts_by_author = {}
views = None views = None
period = 60 * 60 # каждый час period = 60 * 60 # каждый час
analytics_client: BetaAnalyticsDataClient | None = None analytics_client: Optional[BetaAnalyticsDataClient] = None
auth_result = None auth_result = None
running = False running = False
redis_views_key = None
last_update_timestamp = 0
start_date = datetime.now().strftime("%Y-%m-%d") start_date = datetime.now().strftime("%Y-%m-%d")
@staticmethod @staticmethod
async def init(): async def init():
"""Подключение к клиенту Google Analytics с использованием аутентификации""" """Подключение к клиенту Google Analytics и загрузка данных о просмотрах из Redis"""
self = ViewedStorage self = ViewedStorage
async with self.lock: async with self.lock:
# Загрузка предварительно подсчитанных просмотров из файла JSON # Загрузка предварительно подсчитанных просмотров из Redis
self.load_precounted_views() await self.load_views_from_redis()
os.environ.setdefault("GOOGLE_APPLICATION_CREDENTIALS", GOOGLE_KEYFILE_PATH) os.environ.setdefault("GOOGLE_APPLICATION_CREDENTIALS", GOOGLE_KEYFILE_PATH)
if GOOGLE_KEYFILE_PATH and os.path.isfile(GOOGLE_KEYFILE_PATH): if GOOGLE_KEYFILE_PATH and os.path.isfile(GOOGLE_KEYFILE_PATH):
@ -62,40 +66,54 @@ class ViewedStorage:
self.running = False self.running = False
@staticmethod @staticmethod
def load_precounted_views(): async def load_views_from_redis():
"""Загрузка предварительно подсчитанных просмотров из файла JSON""" """Загрузка предварительно подсчитанных просмотров из Redis"""
self = ViewedStorage self = ViewedStorage
viewfile_path = VIEWS_FILEPATH
if not os.path.exists(viewfile_path):
viewfile_path = os.path.join(os.path.curdir, "views.json")
if not os.path.exists(viewfile_path):
logger.warning(" * views.json not found")
return
logger.info(f" * loading views from {viewfile_path}") # Подключаемся к Redis если соединение не установлено
try: if not redis._client:
start_date_int = os.path.getmtime(viewfile_path) await redis.connect()
start_date_str = datetime.fromtimestamp(start_date_int).strftime("%Y-%m-%d")
self.start_date = start_date_str # Получаем список всех ключей migrated_views_* и находим самый последний
keys = await redis.execute("KEYS", "migrated_views_*")
if not keys:
logger.warning(" * No migrated_views keys found in Redis")
return
# Фильтруем только ключи timestamp формата (исключаем migrated_views_slugs)
timestamp_keys = [k for k in keys if k != "migrated_views_slugs"]
if not timestamp_keys:
logger.warning(" * No migrated_views timestamp keys found in Redis")
return
# Сортируем по времени создания (в названии ключа) и берем последний
timestamp_keys.sort()
latest_key = timestamp_keys[-1]
self.redis_views_key = latest_key
# Получаем метку времени создания для установки start_date
timestamp = await redis.execute("HGET", latest_key, "_timestamp")
if timestamp:
self.last_update_timestamp = int(timestamp)
timestamp_dt = datetime.fromtimestamp(int(timestamp))
self.start_date = timestamp_dt.strftime("%Y-%m-%d")
# Если данные сегодняшние, считаем их актуальными
now_date = datetime.now().strftime("%Y-%m-%d") now_date = datetime.now().strftime("%Y-%m-%d")
if now_date == self.start_date: if now_date == self.start_date:
logger.info(" * views data is up to date!") logger.info(" * Views data is up to date!")
else: else:
logger.warn(f" * {viewfile_path} is too old: {self.start_date}") logger.warning(f" * Views data is from {self.start_date}, may need update")
with open(viewfile_path, "r") as file: # Выводим информацию о количестве загруженных записей
precounted_views = orjson.loads(file.read()) total_entries = await redis.execute("HGET", latest_key, "_total")
self.precounted_by_slug.update(precounted_views) if total_entries:
logger.info(f" * {len(precounted_views)} shouts with views was loaded.") logger.info(f" * {total_entries} shouts with views loaded from Redis key: {latest_key}")
except Exception as e:
logger.error(f"precounted views loading error: {e}")
# noinspection PyTypeChecker # noinspection PyTypeChecker
@staticmethod @staticmethod
async def update_pages(): async def update_pages():
"""Запрос всех страниц от Google Analytics, отсортрованных по количеству просмотров""" """Запрос всех страниц от Google Analytics, отсортированных по количеству просмотров"""
self = ViewedStorage self = ViewedStorage
logger.info(" ⎧ views update from Google Analytics ---") logger.info(" ⎧ views update from Google Analytics ---")
if self.running: if self.running:
@ -140,15 +158,40 @@ class ViewedStorage:
self.running = False self.running = False
@staticmethod @staticmethod
def get_shout(shout_slug="", shout_id=0) -> int: async def get_shout(shout_slug="", shout_id=0) -> int:
"""Получение метрики просмотров shout по slug или id.""" """
Получение метрики просмотров shout по slug или id.
Args:
shout_slug: Slug публикации
shout_id: ID публикации
Returns:
int: Количество просмотров
"""
self = ViewedStorage self = ViewedStorage
# Получаем данные из Redis для новой схемы хранения
if not redis._client:
await redis.connect()
fresh_views = self.views_by_shout.get(shout_slug, 0) fresh_views = self.views_by_shout.get(shout_slug, 0)
precounted_views = self.precounted_by_slug.get(shout_slug, 0)
return fresh_views + precounted_views # Если есть id, пытаемся получить данные из Redis по ключу migrated_views_<timestamp>
if shout_id and self.redis_views_key:
precounted_views = await redis.execute("HGET", self.redis_views_key, str(shout_id))
if precounted_views:
return fresh_views + int(precounted_views)
# Если нет id или данных, пытаемся получить по slug из отдельного хеша
precounted_views = await redis.execute("HGET", "migrated_views_slugs", shout_slug)
if precounted_views:
return fresh_views + int(precounted_views)
return fresh_views
@staticmethod @staticmethod
def get_shout_media(shout_slug) -> Dict[str, int]: async def get_shout_media(shout_slug) -> Dict[str, int]:
"""Получение метрики воспроизведения shout по slug.""" """Получение метрики воспроизведения shout по slug."""
self = ViewedStorage self = ViewedStorage
@ -157,23 +200,29 @@ class ViewedStorage:
return self.views_by_shout.get(shout_slug, 0) return self.views_by_shout.get(shout_slug, 0)
@staticmethod @staticmethod
def get_topic(topic_slug) -> int: async def get_topic(topic_slug) -> int:
"""Получение суммарного значения просмотров темы.""" """Получение суммарного значения просмотров темы."""
self = ViewedStorage self = ViewedStorage
return sum(self.views_by_shout.get(shout_slug, 0) for shout_slug in self.shouts_by_topic.get(topic_slug, [])) views_count = 0
for shout_slug in self.shouts_by_topic.get(topic_slug, []):
views_count += await self.get_shout(shout_slug=shout_slug)
return views_count
@staticmethod @staticmethod
def get_author(author_slug) -> int: async def get_author(author_slug) -> int:
"""Получение суммарного значения просмотров автора.""" """Получение суммарного значения просмотров автора."""
self = ViewedStorage self = ViewedStorage
return sum(self.views_by_shout.get(shout_slug, 0) for shout_slug in self.shouts_by_author.get(author_slug, [])) views_count = 0
for shout_slug in self.shouts_by_author.get(author_slug, []):
views_count += await self.get_shout(shout_slug=shout_slug)
return views_count
@staticmethod @staticmethod
def update_topics(shout_slug): def update_topics(shout_slug):
"""Обновление счетчиков темы по slug shout""" """Обновление счетчиков темы по slug shout"""
self = ViewedStorage self = ViewedStorage
with local_session() as session: with local_session() as session:
# Определение вспомогательной функции для избежа<EFBFBD><EFBFBD>ия повторения кода # Определение вспомогательной функции для избежания повторения кода
def update_groups(dictionary, key, value): def update_groups(dictionary, key, value):
dictionary[key] = list(set(dictionary.get(key, []) + [value])) dictionary[key] = list(set(dictionary.get(key, []) + [value]))