Merge branch 'dev' into feat/sv-searching-txtai
All checks were successful
Deploy on push / deploy (push) Successful in 6s

:
This commit is contained in:
Stepan Vladovskiy 2025-04-15 19:20:48 -03:00
commit e382cc1ea5
10 changed files with 130 additions and 60 deletions

View File

@ -1,4 +1,9 @@
#### [0.4.19] - 2025-04-14
- dropped `Shout.description` and `Draft.description` to be UX-generated
- use redis to init views counters after migrator
#### [0.4.18] - 2025-04-10
- Fixed `Topic.stat.authors` and `Topic.stat.comments`
- Fixed unique constraint violation for empty slug values:
- Modified `update_draft` resolver to handle empty slug values
- Modified `create_draft` resolver to prevent empty slug values

View File

@ -31,6 +31,7 @@ class Draft(Base):
# required
created_at: int = Column(Integer, nullable=False, default=lambda: int(time.time()))
created_by: int = Column(ForeignKey("author.id"), nullable=False)
community: int = Column(ForeignKey("community.id"), nullable=False, default=1)
# optional
layout: str = Column(String, nullable=True, default="article")
@ -38,7 +39,6 @@ class Draft(Base):
title: str = Column(String, nullable=True)
subtitle: str | None = Column(String, nullable=True)
lead: str | None = Column(String, nullable=True)
description: str | None = Column(String, nullable=True)
body: str = Column(String, nullable=False, comment="Body")
media: dict | None = Column(JSON, nullable=True)
cover: str | None = Column(String, nullable=True, comment="Cover image url")

View File

@ -91,7 +91,6 @@ class Shout(Base):
cover: str | None = Column(String, nullable=True, comment="Cover image url")
cover_caption: str | None = Column(String, nullable=True, comment="Cover image alt caption")
lead: str | None = Column(String, nullable=True)
description: str | None = Column(String, nullable=True)
title: str = Column(String, nullable=False)
subtitle: str | None = Column(String, nullable=True)
layout: str = Column(String, nullable=False, default="article")

View File

@ -18,4 +18,5 @@ granian
httpx
orjson
pydantic
pydantic
trafilatura

View File

@ -1,6 +1,7 @@
import time
from operator import or_
import trafilatura
from sqlalchemy.sql import and_
from cache.cache import (
@ -30,7 +31,6 @@ def create_shout_from_draft(session, draft, author_id):
cover=draft.cover,
cover_caption=draft.cover_caption,
lead=draft.lead,
description=draft.description,
title=draft.title,
subtitle=draft.subtitle,
layout=draft.layout,
@ -104,7 +104,7 @@ async def create_draft(_, info, draft_input):
if "title" not in draft_input or not draft_input["title"]:
draft_input["title"] = "" # Пустая строка вместо NULL
# Проверяем slug - он должен быть или не пустым, или не передаваться вообще
if "slug" in draft_input and (draft_input["slug"] is None or draft_input["slug"] == ""):
# При создании черновика удаляем пустой slug из входных данных
@ -116,6 +116,10 @@ async def create_draft(_, info, draft_input):
if "id" in draft_input:
del draft_input["id"]
if "seo" not in draft_input and not draft_input["seo"]:
body_teaser = draft_input.get("body", "")[:300].split("\n")[:-1].join("\n")
draft_input["seo"] = draft_input.get("lead", body_teaser)
# Добавляем текущее время создания
draft_input["created_at"] = int(time.time())
@ -161,12 +165,20 @@ async def update_draft(_, info, draft_id: int, draft_input):
if not draft:
return {"error": "Draft not found"}
if "seo" not in draft_input and not draft.seo:
body_src = draft_input["body"] if "body" in draft_input else draft.body
body_text = trafilatura.extract(body_src)
lead_src = draft_input["lead"] if "lead" in draft_input else draft.lead
lead_text = trafilatura.extract(lead_src)
body_teaser = body_text[:300].split(". ")[:-1].join(".\n")
draft_input["seo"] = lead_text or body_teaser
Draft.update(draft, draft_input)
# Set updated_at and updated_by from the authenticated user
current_time = int(time.time())
draft.updated_at = current_time
draft.updated_by = author_id
session.commit()
return {"draft": draft}
@ -267,7 +279,6 @@ async def publish_shout(_, info, shout_id: int):
shout.cover = draft.cover
shout.cover_caption = draft.cover_caption
shout.lead = draft.lead
shout.description = draft.description
shout.layout = draft.layout
shout.media = draft.media
shout.lang = draft.lang

View File

@ -1,6 +1,7 @@
import time
import orjson
import trafilatura
from sqlalchemy import and_, desc, select
from sqlalchemy.orm import joinedload
from sqlalchemy.sql.functions import coalesce
@ -176,9 +177,16 @@ async def create_shout(_, info, inp):
logger.info(f"Creating shout with input: {inp}")
# Создаем публикацию без topics
body = inp.get("body", "")
lead = inp.get("lead", "")
body_text = trafilatura.extract(body)
lead_text = trafilatura.extract(lead)
seo = inp.get("seo", lead_text or body_text[:300].split(". ")[:-1].join(". "))
new_shout = Shout(
slug=slug,
body=inp.get("body", ""),
body=body,
seo=seo,
lead=lead,
layout=inp.get("layout", "article"),
title=inp.get("title", ""),
created_by=author_id,
@ -380,7 +388,7 @@ def patch_topics(session, shout, topics_input):
# @login_required
async def update_shout(_, info, shout_id: int, shout_input=None, publish=False):
logger.info(f"Starting update_shout with id={shout_id}, publish={publish}")
logger.debug(f"Full shout_input: {shout_input}")
logger.debug(f"Full shout_input: {shout_input}") # DraftInput
user_id = info.context.get("user_id")
roles = info.context.get("roles", [])

View File

@ -6,7 +6,7 @@ from cache.cache import (
get_cached_topic_authors,
get_cached_topic_by_slug,
get_cached_topic_followers,
invalidate_cache_by_prefix
invalidate_cache_by_prefix,
)
from orm.author import Author
from orm.topic import Topic
@ -126,7 +126,7 @@ async def get_topics_with_stats(limit=100, offset=0, community_id=None, by=None)
GROUP BY topic
"""
followers_stats = {row[0]: row[1] for row in session.execute(text(followers_stats_query))}
# Запрос на получение статистики авторов для выбранных тем
authors_stats_query = f"""
SELECT st.topic, COUNT(DISTINCT sa.author) as authors_count
@ -149,7 +149,6 @@ async def get_topics_with_stats(limit=100, offset=0, community_id=None, by=None)
"""
comments_stats = {row[0]: row[1] for row in session.execute(text(comments_stats_query))}
# Формируем результат с добавлением статистики
result = []
for topic in topics:
@ -158,7 +157,7 @@ async def get_topics_with_stats(limit=100, offset=0, community_id=None, by=None)
"shouts": shouts_stats.get(topic.id, 0),
"followers": followers_stats.get(topic.id, 0),
"authors": authors_stats.get(topic.id, 0),
"comments": comments_stats.get(topic.id, 0)
"comments": comments_stats.get(topic.id, 0),
}
result.append(topic_dict)

View File

@ -33,7 +33,6 @@ input DraftInput {
main_topic_id: Int # Changed from main_topic: Topic
media: [MediaItemInput] # Changed to use MediaItemInput
lead: String
description: String
subtitle: String
lang: String
seo: String

View File

@ -80,7 +80,6 @@ type Shout {
layout: String!
lead: String
description: String
subtitle: String
lang: String
cover: String
@ -100,6 +99,7 @@ type Shout {
featured_at: Int
deleted_at: Int
seo: String # generated if not set
version_of: Shout # TODO: use version_of somewhere
draft: Draft
media: [MediaItem]
@ -111,13 +111,12 @@ type Draft {
id: Int!
created_at: Int!
created_by: Author!
community: Community!
layout: String
slug: String
title: String
subtitle: String
lead: String
description: String
body: String
media: [MediaItem]
cover: String

View File

@ -2,9 +2,7 @@ import asyncio
import os
import time
from datetime import datetime, timedelta, timezone
from typing import Dict
import orjson
from typing import Dict, Optional
# ga
from google.analytics.data_v1beta import BetaAnalyticsDataClient
@ -20,33 +18,39 @@ from orm.author import Author
from orm.shout import Shout, ShoutAuthor, ShoutTopic
from orm.topic import Topic
from services.db import local_session
from services.redis import redis
from utils.logger import root_logger as logger
GOOGLE_KEYFILE_PATH = os.environ.get("GOOGLE_KEYFILE_PATH", "/dump/google-service.json")
GOOGLE_PROPERTY_ID = os.environ.get("GOOGLE_PROPERTY_ID", "")
VIEWS_FILEPATH = "/dump/views.json"
class ViewedStorage:
"""
Класс для хранения и доступа к данным о просмотрах.
Использует Redis в качестве основного хранилища и Google Analytics для сбора новых данных.
"""
lock = asyncio.Lock()
precounted_by_slug = {}
views_by_shout = {}
shouts_by_topic = {}
shouts_by_author = {}
views = None
period = 60 * 60 # каждый час
analytics_client: BetaAnalyticsDataClient | None = None
analytics_client: Optional[BetaAnalyticsDataClient] = None
auth_result = None
running = False
redis_views_key = None
last_update_timestamp = 0
start_date = datetime.now().strftime("%Y-%m-%d")
@staticmethod
async def init():
"""Подключение к клиенту Google Analytics с использованием аутентификации"""
"""Подключение к клиенту Google Analytics и загрузка данных о просмотрах из Redis"""
self = ViewedStorage
async with self.lock:
# Загрузка предварительно подсчитанных просмотров из файла JSON
self.load_precounted_views()
# Загрузка предварительно подсчитанных просмотров из Redis
await self.load_views_from_redis()
os.environ.setdefault("GOOGLE_APPLICATION_CREDENTIALS", GOOGLE_KEYFILE_PATH)
if GOOGLE_KEYFILE_PATH and os.path.isfile(GOOGLE_KEYFILE_PATH):
@ -62,40 +66,54 @@ class ViewedStorage:
self.running = False
@staticmethod
def load_precounted_views():
"""Загрузка предварительно подсчитанных просмотров из файла JSON"""
async def load_views_from_redis():
"""Загрузка предварительно подсчитанных просмотров из Redis"""
self = ViewedStorage
viewfile_path = VIEWS_FILEPATH
if not os.path.exists(viewfile_path):
viewfile_path = os.path.join(os.path.curdir, "views.json")
if not os.path.exists(viewfile_path):
logger.warning(" * views.json not found")
return
logger.info(f" * loading views from {viewfile_path}")
try:
start_date_int = os.path.getmtime(viewfile_path)
start_date_str = datetime.fromtimestamp(start_date_int).strftime("%Y-%m-%d")
self.start_date = start_date_str
# Подключаемся к Redis если соединение не установлено
if not redis._client:
await redis.connect()
# Получаем список всех ключей migrated_views_* и находим самый последний
keys = await redis.execute("KEYS", "migrated_views_*")
if not keys:
logger.warning(" * No migrated_views keys found in Redis")
return
# Фильтруем только ключи timestamp формата (исключаем migrated_views_slugs)
timestamp_keys = [k for k in keys if k != "migrated_views_slugs"]
if not timestamp_keys:
logger.warning(" * No migrated_views timestamp keys found in Redis")
return
# Сортируем по времени создания (в названии ключа) и берем последний
timestamp_keys.sort()
latest_key = timestamp_keys[-1]
self.redis_views_key = latest_key
# Получаем метку времени создания для установки start_date
timestamp = await redis.execute("HGET", latest_key, "_timestamp")
if timestamp:
self.last_update_timestamp = int(timestamp)
timestamp_dt = datetime.fromtimestamp(int(timestamp))
self.start_date = timestamp_dt.strftime("%Y-%m-%d")
# Если данные сегодняшние, считаем их актуальными
now_date = datetime.now().strftime("%Y-%m-%d")
if now_date == self.start_date:
logger.info(" * views data is up to date!")
logger.info(" * Views data is up to date!")
else:
logger.warn(f" * {viewfile_path} is too old: {self.start_date}")
logger.warning(f" * Views data is from {self.start_date}, may need update")
with open(viewfile_path, "r") as file:
precounted_views = orjson.loads(file.read())
self.precounted_by_slug.update(precounted_views)
logger.info(f" * {len(precounted_views)} shouts with views was loaded.")
except Exception as e:
logger.error(f"precounted views loading error: {e}")
# Выводим информацию о количестве загруженных записей
total_entries = await redis.execute("HGET", latest_key, "_total")
if total_entries:
logger.info(f" * {total_entries} shouts with views loaded from Redis key: {latest_key}")
# noinspection PyTypeChecker
@staticmethod
async def update_pages():
"""Запрос всех страниц от Google Analytics, отсортрованных по количеству просмотров"""
"""Запрос всех страниц от Google Analytics, отсортированных по количеству просмотров"""
self = ViewedStorage
logger.info(" ⎧ views update from Google Analytics ---")
if self.running:
@ -140,15 +158,40 @@ class ViewedStorage:
self.running = False
@staticmethod
def get_shout(shout_slug="", shout_id=0) -> int:
"""Получение метрики просмотров shout по slug или id."""
async def get_shout(shout_slug="", shout_id=0) -> int:
"""
Получение метрики просмотров shout по slug или id.
Args:
shout_slug: Slug публикации
shout_id: ID публикации
Returns:
int: Количество просмотров
"""
self = ViewedStorage
# Получаем данные из Redis для новой схемы хранения
if not redis._client:
await redis.connect()
fresh_views = self.views_by_shout.get(shout_slug, 0)
precounted_views = self.precounted_by_slug.get(shout_slug, 0)
return fresh_views + precounted_views
# Если есть id, пытаемся получить данные из Redis по ключу migrated_views_<timestamp>
if shout_id and self.redis_views_key:
precounted_views = await redis.execute("HGET", self.redis_views_key, str(shout_id))
if precounted_views:
return fresh_views + int(precounted_views)
# Если нет id или данных, пытаемся получить по slug из отдельного хеша
precounted_views = await redis.execute("HGET", "migrated_views_slugs", shout_slug)
if precounted_views:
return fresh_views + int(precounted_views)
return fresh_views
@staticmethod
def get_shout_media(shout_slug) -> Dict[str, int]:
async def get_shout_media(shout_slug) -> Dict[str, int]:
"""Получение метрики воспроизведения shout по slug."""
self = ViewedStorage
@ -157,23 +200,29 @@ class ViewedStorage:
return self.views_by_shout.get(shout_slug, 0)
@staticmethod
def get_topic(topic_slug) -> int:
async def get_topic(topic_slug) -> int:
"""Получение суммарного значения просмотров темы."""
self = ViewedStorage
return sum(self.views_by_shout.get(shout_slug, 0) for shout_slug in self.shouts_by_topic.get(topic_slug, []))
views_count = 0
for shout_slug in self.shouts_by_topic.get(topic_slug, []):
views_count += await self.get_shout(shout_slug=shout_slug)
return views_count
@staticmethod
def get_author(author_slug) -> int:
async def get_author(author_slug) -> int:
"""Получение суммарного значения просмотров автора."""
self = ViewedStorage
return sum(self.views_by_shout.get(shout_slug, 0) for shout_slug in self.shouts_by_author.get(author_slug, []))
views_count = 0
for shout_slug in self.shouts_by_author.get(author_slug, []):
views_count += await self.get_shout(shout_slug=shout_slug)
return views_count
@staticmethod
def update_topics(shout_slug):
"""Обновление счетчиков темы по slug shout"""
self = ViewedStorage
with local_session() as session:
# Определение вспомогательной функции для избежа<EFBFBD><EFBFBD>ия повторения кода
# Определение вспомогательной функции для избежания повторения кода
def update_groups(dictionary, key, value):
dictionary[key] = list(set(dictionary.get(key, []) + [value]))