draft-validator-fix
Some checks failed
Deploy on push / deploy (push) Failing after 2m30s

This commit is contained in:
2025-08-23 12:36:04 +03:00
parent ee53d5b491
commit 19a964585e
4 changed files with 93 additions and 35 deletions

34
utils/validators.py Normal file
View File

@@ -0,0 +1,34 @@
from utils.extract_text import extract_text
from utils.logger import root_logger as logger
def validate_html_content(html_content: str) -> tuple[bool, str]:
"""
Проверяет валидность HTML контента через trafilatura.
Args:
html_content: HTML строка для проверки
Returns:
tuple[bool, str]: (валидность, сообщение об ошибке)
Example:
>>> is_valid, error = validate_html_content("<p>Valid HTML</p>")
>>> is_valid
True
>>> error
''
>>> is_valid, error = validate_html_content("Invalid < HTML")
>>> is_valid
False
>>> 'Invalid HTML' in error
True
"""
if not html_content or not html_content.strip():
return False, ""
try:
extracted = extract_text(html_content)
return bool(extracted), extracted or ""
except Exception as e:
logger.error(f"HTML validation error: {e}", exc_info=True)
return False, f"Invalid HTML content: {e!s}"