Improve topic sorting: add popular sorting by publications and authors count
This commit is contained in:
@@ -2,7 +2,7 @@ import re
|
||||
from difflib import ndiff
|
||||
|
||||
|
||||
def get_diff(original, modified):
|
||||
def get_diff(original: str, modified: str) -> list[str]:
|
||||
"""
|
||||
Get the difference between two strings using difflib.
|
||||
|
||||
@@ -13,11 +13,10 @@ def get_diff(original, modified):
|
||||
Returns:
|
||||
A list of differences.
|
||||
"""
|
||||
diff = list(ndiff(original.split(), modified.split()))
|
||||
return diff
|
||||
return list(ndiff(original.split(), modified.split()))
|
||||
|
||||
|
||||
def apply_diff(original, diff):
|
||||
def apply_diff(original: str, diff: list[str]) -> str:
|
||||
"""
|
||||
Apply the difference to the original string.
|
||||
|
||||
|
@@ -1,28 +1,118 @@
|
||||
from decimal import Decimal
|
||||
from json import JSONEncoder
|
||||
"""
|
||||
JSON encoders and utilities
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import decimal
|
||||
from typing import Any, Union
|
||||
|
||||
import orjson
|
||||
|
||||
|
||||
class CustomJSONEncoder(JSONEncoder):
|
||||
def default_json_encoder(obj: Any) -> Any:
|
||||
"""
|
||||
Расширенный JSON энкодер с поддержкой сериализации объектов SQLAlchemy.
|
||||
Default JSON encoder для объектов, которые не поддерживаются стандартным JSON
|
||||
|
||||
Примеры:
|
||||
>>> import json
|
||||
>>> from decimal import Decimal
|
||||
>>> from orm.topic import Topic
|
||||
>>> json.dumps(Decimal("10.50"), cls=CustomJSONEncoder)
|
||||
'"10.50"'
|
||||
>>> topic = Topic(id=1, slug="test")
|
||||
>>> json.dumps(topic, cls=CustomJSONEncoder)
|
||||
'{"id": 1, "slug": "test", ...}'
|
||||
Args:
|
||||
obj: Объект для сериализации
|
||||
|
||||
Returns:
|
||||
Сериализуемое представление объекта
|
||||
|
||||
Raises:
|
||||
TypeError: Если объект не может быть сериализован
|
||||
"""
|
||||
if hasattr(obj, "dict") and callable(obj.dict):
|
||||
return obj.dict()
|
||||
if hasattr(obj, "__dict__"):
|
||||
return obj.__dict__
|
||||
if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
|
||||
return obj.isoformat()
|
||||
if isinstance(obj, decimal.Decimal):
|
||||
return float(obj)
|
||||
if hasattr(obj, "__json__"):
|
||||
return obj.__json__()
|
||||
msg = f"Object of type {type(obj)} is not JSON serializable"
|
||||
raise TypeError(msg)
|
||||
|
||||
def default(self, obj):
|
||||
if isinstance(obj, Decimal):
|
||||
return str(obj)
|
||||
|
||||
# Проверяем, есть ли у объекта метод dict() (как у моделей SQLAlchemy)
|
||||
if hasattr(obj, "dict") and callable(obj.dict):
|
||||
return obj.dict()
|
||||
def orjson_dumps(obj: Any, **kwargs: Any) -> bytes:
|
||||
"""
|
||||
Сериализует объект в JSON с помощью orjson
|
||||
|
||||
return super().default(obj)
|
||||
Args:
|
||||
obj: Объект для сериализации
|
||||
**kwargs: Дополнительные параметры для orjson.dumps
|
||||
|
||||
Returns:
|
||||
bytes: JSON в виде байтов
|
||||
"""
|
||||
# Используем правильную константу для orjson
|
||||
option_flags = orjson.OPT_SERIALIZE_DATACLASS
|
||||
if kwargs.get("indent"):
|
||||
option_flags |= orjson.OPT_INDENT_2
|
||||
|
||||
return orjson.dumps(obj, default=default_json_encoder, option=option_flags)
|
||||
|
||||
|
||||
def orjson_loads(data: Union[str, bytes]) -> Any:
|
||||
"""
|
||||
Десериализует JSON с помощью orjson
|
||||
|
||||
Args:
|
||||
data: JSON данные в виде строки или байтов
|
||||
|
||||
Returns:
|
||||
Десериализованный объект
|
||||
"""
|
||||
return orjson.loads(data)
|
||||
|
||||
|
||||
class JSONEncoder:
|
||||
"""Кастомный JSON кодировщик на основе orjson"""
|
||||
|
||||
@staticmethod
|
||||
def encode(obj: Any) -> str:
|
||||
"""Encode object to JSON string"""
|
||||
return orjson_dumps(obj).decode("utf-8")
|
||||
|
||||
@staticmethod
|
||||
def decode(data: Union[str, bytes]) -> Any:
|
||||
"""Decode JSON string to object"""
|
||||
return orjson_loads(data)
|
||||
|
||||
|
||||
# Создаем экземпляр для обратной совместимости
|
||||
CustomJSONEncoder = JSONEncoder()
|
||||
|
||||
|
||||
def fast_json_dumps(obj: Any, indent: bool = False) -> str:
|
||||
"""
|
||||
Быстрая сериализация JSON
|
||||
|
||||
Args:
|
||||
obj: Объект для сериализации
|
||||
indent: Форматировать с отступами
|
||||
|
||||
Returns:
|
||||
JSON строка
|
||||
"""
|
||||
return orjson_dumps(obj, indent=indent).decode("utf-8")
|
||||
|
||||
|
||||
def fast_json_loads(data: Union[str, bytes]) -> Any:
|
||||
"""
|
||||
Быстрая десериализация JSON
|
||||
|
||||
Args:
|
||||
data: JSON данные
|
||||
|
||||
Returns:
|
||||
Десериализованный объект
|
||||
"""
|
||||
return orjson_loads(data)
|
||||
|
||||
|
||||
# Экспортируем для удобства
|
||||
dumps = fast_json_dumps
|
||||
loads = fast_json_loads
|
||||
|
@@ -4,24 +4,31 @@
|
||||
|
||||
import trafilatura
|
||||
|
||||
from utils.logger import root_logger as logger
|
||||
|
||||
|
||||
def extract_text(html: str) -> str:
|
||||
"""
|
||||
Извлекает текст из HTML-фрагмента.
|
||||
Извлекает чистый текст из HTML
|
||||
|
||||
Args:
|
||||
html: HTML-фрагмент
|
||||
html: HTML строка
|
||||
|
||||
Returns:
|
||||
str: Текст из HTML-фрагмента
|
||||
str: Извлеченный текст или пустая строка
|
||||
"""
|
||||
return trafilatura.extract(
|
||||
wrap_html_fragment(html),
|
||||
include_comments=False,
|
||||
include_tables=False,
|
||||
include_images=False,
|
||||
include_formatting=False,
|
||||
)
|
||||
try:
|
||||
result = trafilatura.extract(
|
||||
html,
|
||||
include_comments=False,
|
||||
include_tables=True,
|
||||
include_formatting=False,
|
||||
favor_precision=True,
|
||||
)
|
||||
return result or ""
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting text: {e}")
|
||||
return ""
|
||||
|
||||
|
||||
def wrap_html_fragment(fragment: str) -> str:
|
||||
|
@@ -5,48 +5,55 @@ from auth.orm import Author
|
||||
from services.db import local_session
|
||||
|
||||
|
||||
def replace_translit(src):
|
||||
def replace_translit(src: str) -> str:
|
||||
ruchars = "абвгдеёжзийклмнопрстуфхцчшщъыьэюя."
|
||||
enchars = [
|
||||
"a",
|
||||
"b",
|
||||
"v",
|
||||
"g",
|
||||
"d",
|
||||
"e",
|
||||
"yo",
|
||||
"zh",
|
||||
"z",
|
||||
"i",
|
||||
"y",
|
||||
"k",
|
||||
"l",
|
||||
"m",
|
||||
"n",
|
||||
"o",
|
||||
"p",
|
||||
"r",
|
||||
"s",
|
||||
"t",
|
||||
"u",
|
||||
"f",
|
||||
"h",
|
||||
"c",
|
||||
"ch",
|
||||
"sh",
|
||||
"sch",
|
||||
"",
|
||||
"y",
|
||||
"'",
|
||||
"e",
|
||||
"yu",
|
||||
"ya",
|
||||
"-",
|
||||
]
|
||||
return src.translate(str.maketrans(ruchars, enchars))
|
||||
enchars = "abvgdeyozhziyklmnoprstufhcchshsch'yye'yuyaa-"
|
||||
|
||||
# Создаем словарь для замены, так как некоторые русские символы соответствуют нескольким латинским
|
||||
translit_dict = {
|
||||
"а": "a",
|
||||
"б": "b",
|
||||
"в": "v",
|
||||
"г": "g",
|
||||
"д": "d",
|
||||
"е": "e",
|
||||
"ё": "yo",
|
||||
"ж": "zh",
|
||||
"з": "z",
|
||||
"и": "i",
|
||||
"й": "y",
|
||||
"к": "k",
|
||||
"л": "l",
|
||||
"м": "m",
|
||||
"н": "n",
|
||||
"о": "o",
|
||||
"п": "p",
|
||||
"р": "r",
|
||||
"с": "s",
|
||||
"т": "t",
|
||||
"у": "u",
|
||||
"ф": "f",
|
||||
"х": "h",
|
||||
"ц": "c",
|
||||
"ч": "ch",
|
||||
"ш": "sh",
|
||||
"щ": "sch",
|
||||
"ъ": "",
|
||||
"ы": "y",
|
||||
"ь": "",
|
||||
"э": "e",
|
||||
"ю": "yu",
|
||||
"я": "ya",
|
||||
".": "-",
|
||||
}
|
||||
|
||||
result = ""
|
||||
for char in src:
|
||||
result += translit_dict.get(char, char)
|
||||
return result
|
||||
|
||||
|
||||
def generate_unique_slug(src):
|
||||
def generate_unique_slug(src: str) -> str:
|
||||
print("[resolvers.auth] generating slug from: " + src)
|
||||
slug = replace_translit(src.lower())
|
||||
slug = re.sub("[^0-9a-zA-Z]+", "-", slug)
|
||||
@@ -63,3 +70,6 @@ def generate_unique_slug(src):
|
||||
unique_slug = slug
|
||||
print("[resolvers.auth] " + unique_slug)
|
||||
return quote_plus(unique_slug.replace("'", "")).replace("+", "-")
|
||||
|
||||
# Fallback return если что-то пошло не так
|
||||
return quote_plus(slug.replace("'", "")).replace("+", "-")
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import colorlog
|
||||
|
||||
@@ -7,7 +8,7 @@ _lib_path = Path(__file__).parents[1]
|
||||
_leng_path = len(_lib_path.as_posix())
|
||||
|
||||
|
||||
def filter(record: logging.LogRecord):
|
||||
def filter(record: logging.LogRecord) -> bool:
|
||||
# Define `package` attribute with the relative path.
|
||||
record.package = record.pathname[_leng_path + 1 :].replace(".py", "")
|
||||
record.emoji = (
|
||||
@@ -23,7 +24,7 @@ def filter(record: logging.LogRecord):
|
||||
if record.levelno == logging.CRITICAL
|
||||
else ""
|
||||
)
|
||||
return record
|
||||
return True
|
||||
|
||||
|
||||
# Define the color scheme
|
||||
@@ -57,28 +58,32 @@ fmt_config = {
|
||||
|
||||
|
||||
class MultilineColoredFormatter(colorlog.ColoredFormatter):
|
||||
def __init__(self, *args, **kwargs):
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
self.log_colors = kwargs.pop("log_colors", {})
|
||||
self.secondary_log_colors = kwargs.pop("secondary_log_colors", {})
|
||||
|
||||
def format(self, record):
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
# Add default emoji if not present
|
||||
if not hasattr(record, "emoji"):
|
||||
record = filter(record)
|
||||
record.emoji = "📝"
|
||||
|
||||
message = record.getMessage()
|
||||
if "\n" in message:
|
||||
lines = message.split("\n")
|
||||
first_line = lines[0]
|
||||
record.message = first_line
|
||||
formatted_first_line = super().format(record)
|
||||
# Add default package if not present
|
||||
if not hasattr(record, "package"):
|
||||
record.package = getattr(record, "name", "unknown")
|
||||
|
||||
# Format the first line normally
|
||||
formatted_first_line = super().format(record)
|
||||
|
||||
# Check if the message has multiple lines
|
||||
lines = formatted_first_line.split("\n")
|
||||
if len(lines) > 1:
|
||||
# For multiple lines, only apply colors to the first line
|
||||
# Keep subsequent lines without color formatting
|
||||
formatted_lines = [formatted_first_line]
|
||||
for line in lines[1:]:
|
||||
formatted_lines.append(line)
|
||||
formatted_lines.extend(lines[1:])
|
||||
return "\n".join(formatted_lines)
|
||||
else:
|
||||
return super().format(record)
|
||||
return super().format(record)
|
||||
|
||||
|
||||
# Create a MultilineColoredFormatter object for colorized logging
|
||||
@@ -89,7 +94,7 @@ stream = logging.StreamHandler()
|
||||
stream.setFormatter(formatter)
|
||||
|
||||
|
||||
def get_colorful_logger(name="main"):
|
||||
def get_colorful_logger(name: str = "main") -> logging.Logger:
|
||||
# Create and configure the logger
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
Reference in New Issue
Block a user