from utils.extract_text import extract_text from utils.logger import root_logger as logger def validate_html_content(html_content: str) -> tuple[bool, str]: """ Проверяет валидность HTML контента через trafilatura. Args: html_content: HTML строка для проверки Returns: tuple[bool, str]: (валидность, сообщение об ошибке) Example: >>> is_valid, error = validate_html_content("

Valid HTML

") >>> is_valid True >>> error '' >>> is_valid, error = validate_html_content("Invalid < HTML") >>> is_valid False >>> 'Invalid HTML' in error True """ if not html_content or not html_content.strip(): return False, "" try: extracted = extract_text(html_content) return bool(extracted), extracted or "" except Exception as e: logger.error(f"HTML validation error: {e}", exc_info=True) return False, f"Invalid HTML content: {e!s}"