services/search.py

import asyncio
import gzip
import json
import pickle
import time
from pathlib import Path
from typing import Any, Dict, List, cast

import muvera
import numpy as np
from sentence_transformers import SentenceTransformer

from settings import MUVERA_INDEX_NAME, SEARCH_MAX_BATCH_SIZE, SEARCH_PREFETCH_SIZE
from utils.logger import root_logger as logger

# Global collection for background tasks
background_tasks: List[asyncio.Task] = []


class MuveraWrapper:
    """🔍 Real vector search with SentenceTransformers + FDE encoding"""

    def __init__(self, vector_dimension: int = 768, cache_enabled: bool = True, batch_size: int = 100) -> None:
        self.vector_dimension = vector_dimension
        self.cache_enabled = cache_enabled
        self.batch_size = batch_size
        self.encoder: Any = None
        self.buckets = 128  # Default number of buckets for FDE encoding
        self.documents: Dict[str, Dict[str, Any]] = {}  # Simple in-memory storage for demo
        self.embeddings: Dict[str, np.ndarray | None] = {}  # Store encoded embeddings

        # 🚀 Инициализируем реальную модель эмбедингов
        try:
            # Используем многоязычную модель, хорошо работающую с русским
            self.encoder = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
            logger.info("🔍 SentenceTransformer model loaded successfully")
        except Exception as e:
            logger.error(f"Failed to load SentenceTransformer: {e}")
            # Fallback - простая модель
            try:
                self.encoder = SentenceTransformer("all-MiniLM-L6-v2")
                logger.info("🔍 Fallback SentenceTransformer model loaded")
            except Exception:
                logger.error("Failed to load any SentenceTransformer model")
                self.encoder = None

    async def async_init(self) -> None:
        """🔄 Асинхронная инициализация - восстановление индекса из файла"""
        try:
            logger.info("🔍 Пытаемся восстановить векторный индекс из файла...")

            # Проверяем метаданные сначала
            metadata = await self.get_index_metadata_from_file()
            if metadata:
                logger.info(
                    f"🔍 Найден сохраненный индекс от {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(metadata.get('saved_at', 0)))}"
                )

                # Восстанавливаем индекс
                if await self.load_index_from_file():
                    logger.info("✅ Векторный индекс успешно восстановлен из файла")
                else:
                    logger.warning("⚠️ Не удалось восстановить индекс из файла")
            else:
                logger.info("🔍 Сохраненный индекс не найден, будет создан новый")

        except Exception as e:
            logger.error(f"❌ Ошибка при восстановлении индекса: {e}")

    async def info(self) -> dict:
        """Return service information"""
        return {
            "vector_dimension": self.vector_dimension,
            "buckets": self.buckets,
            "documents_count": len(self.documents),
            "cache_enabled": self.cache_enabled,
        }

    async def search(self, query: str, limit: int) -> List[Dict[str, Any]]:
        """🔍 Real vector search using SentenceTransformers + FDE encoding"""
        if not query.strip() or not self.encoder:
            return []

        try:
            # 🚀 Генерируем настоящий эмбединг запроса
            query_text = query.strip()
            query_embedding = self.encoder.encode(query_text, convert_to_numpy=True)

            # Нормализуем размерность для FDE
            if query_embedding.ndim == 1:
                query_embedding = query_embedding.reshape(1, -1)

            # Encode query using FDE
            query_fde = muvera.encode_fde(query_embedding, self.buckets, "avg")

            # 🔍 Semantic similarity search
            results = []
            for doc_id, doc_embedding in self.embeddings.items():
                if doc_embedding is not None:
                    # Calculate cosine similarity
                    similarity = np.dot(query_fde, doc_embedding) / (
                        np.linalg.norm(query_fde) * np.linalg.norm(doc_embedding) + 1e-8
                    )
                    results.append(
                        {
                            "id": doc_id,
                            "score": float(similarity),
                            "metadata": self.documents.get(doc_id, {}).get("metadata", {}),
                        }
                    )

            # Sort by score and limit results
            results.sort(key=lambda x: x["score"], reverse=True)
            return results[:limit]

        except Exception as e:
            logger.error(f"🔍 Search error: {e}")
            return []

    async def index(self, documents: List[Dict[str, Any]], silent: bool = False) -> None:
        """🚀 Index documents using real SentenceTransformers + FDE encoding"""
        if not self.encoder:
            if not silent:
                logger.warning("🔍 No encoder available for indexing")
            return

        # 🤫 Batch mode detection
        is_batch = len(documents) > 10
        indexed_count = 0
        skipped_count = 0

        if is_batch:
            # 🚀 Batch processing for better performance
            valid_docs = []
            doc_contents = []

            for doc in documents:
                doc_id = doc["id"]
                self.documents[doc_id] = doc

                title = doc.get("title", "").strip()
                body = doc.get("body", "").strip()
                doc_content = f"{title} {body}".strip()

                if doc_content:
                    valid_docs.append(doc)
                    doc_contents.append(doc_content)
                else:
                    skipped_count += 1

            if doc_contents:
                try:
                    # 🚀 Batch encode all documents at once
                    batch_embeddings = self.encoder.encode(
                        doc_contents, convert_to_numpy=True, show_progress_bar=not silent, batch_size=32
                    )

                    # Process each embedding
                    for doc, embedding in zip(valid_docs, batch_embeddings, strict=False):
                        emb = embedding
                        doc_id = doc["id"]

                        # Нормализуем размерность для FDE
                        if emb.ndim == 1:
                            emb = emb.reshape(1, -1)

                        # Encode using FDE
                        doc_fde = muvera.encode_fde(emb, self.buckets, "avg")
                        self.embeddings[doc_id] = doc_fde
                        indexed_count += 1

                except Exception as e:
                    if not silent:
                        logger.error(f"🔍 Batch encoding error: {e}")
                    return
        else:
            # 🔍 Single document processing
            for doc in documents:
                try:
                    doc_id = doc["id"]
                    self.documents[doc_id] = doc

                    title = doc.get("title", "").strip()
                    body = doc.get("body", "").strip()
                    doc_content = f"{title} {body}".strip()

                    if not doc_content:
                        if not silent:
                            logger.warning(f"🔍 Empty content for document {doc_id}")
                        skipped_count += 1
                        continue

                    # 🚀 Single document encoding
                    doc_embedding = self.encoder.encode(doc_content, convert_to_numpy=True, show_progress_bar=False)

                    if doc_embedding.ndim == 1:
                        doc_embedding = doc_embedding.reshape(1, -1)

                    doc_fde = muvera.encode_fde(doc_embedding, self.buckets, "avg")
                    self.embeddings[doc_id] = doc_fde
                    indexed_count += 1

                    if not silent:
                        logger.debug(f"🔍 Indexed document {doc_id} with content length {len(doc_content)}")

                except Exception as e:
                    if not silent:
                        logger.error(f"🔍 Indexing error for document {doc.get('id', 'unknown')}: {e}")
                    skipped_count += 1
                    continue

        # 🔍 Final statistics
        if not silent:
            if is_batch:
                logger.info(f"🚀 Batch indexed {indexed_count} documents, skipped {skipped_count}")
            elif indexed_count > 0:
                logger.debug(f"🔍 Indexed {indexed_count} documents")

        # 🗃️ Автосохранение индекса после успешной индексации
        if indexed_count > 0:
            try:
                await self.save_index_to_file()
                if not silent:
                    logger.debug("💾 Индекс автоматически сохранен в файл")
            except Exception as e:
                logger.warning(f"⚠️ Не удалось автоматически сохранить индекс: {e}")

    async def verify_documents(self, doc_ids: List[str]) -> Dict[str, Any]:
        """Verify which documents exist in the index"""
        missing = [doc_id for doc_id in doc_ids if doc_id not in self.documents]
        return {"missing": missing}

    async def get_index_status(self) -> Dict[str, Any]:
        """Get index status information"""
        return {
            "total_documents": len(self.documents),
            "total_embeddings": len(self.embeddings),
            "consistency": {"status": "ok", "null_embeddings_count": 0},
        }

    async def save_index_to_file(self, dump_dir: str = "/dump") -> bool:
        """🗃️ Сохраняет векторный индекс в файл с использованием gzip сжатия"""
        try:
            # Создаем директорию если не существует
            dump_path = Path(dump_dir)
            dump_path.mkdir(parents=True, exist_ok=True)

            # Подготавливаем данные для сериализации
            index_data = {
                "documents": self.documents,
                "embeddings": self.embeddings,
                "vector_dimension": self.vector_dimension,
                "buckets": self.buckets,
                "timestamp": int(time.time()),
                "version": "1.0",
                "index_name": MUVERA_INDEX_NAME,
            }

            # Сериализуем данные с pickle
            serialized_data = pickle.dumps(index_data)

            # Подготавливаем имена файлов
            index_file = dump_path / f"{MUVERA_INDEX_NAME}_vector_index.pkl.gz"
            metadata_file = dump_path / f"{MUVERA_INDEX_NAME}_metadata.json"

            # Сохраняем основной индекс с gzip сжатием
            with gzip.open(index_file, "wb") as f:
                f.write(serialized_data)

            # Сохраняем метаданные отдельно для быстрого доступа
            metadata = {
                "documents_count": len(self.documents),
                "embeddings_count": len(self.embeddings),
                "vector_dimension": self.vector_dimension,
                "buckets": self.buckets,
                "saved_at": int(time.time()),
                "version": "1.0",
                "index_name": MUVERA_INDEX_NAME,
                "original_size_bytes": len(serialized_data),
                "compressed_size_bytes": int(index_file.stat().st_size) if index_file.exists() else 0,
                "index_file": str(index_file),
                "metadata_file": str(metadata_file),
            }

            with Path(metadata_file).open(mode="w", encoding="utf-8") as f:
                json.dump(metadata, f, indent=2, ensure_ascii=False)

            original_size = cast(int, metadata["original_size_bytes"])
            compressed_size = cast(int, metadata["compressed_size_bytes"])
            compression_ratio = original_size / compressed_size if compressed_size > 0 else 1.0

            logger.info("🗃️ Векторный индекс сохранен в файл:")
            logger.info(f"  📁 Файл: {index_file}")
            logger.info(f"  📊 Документов: {len(self.documents)}, эмбедингов: {len(self.embeddings)}")
            logger.info(
                f"  💾 Размер: {metadata['original_size_bytes']:,} → {metadata['compressed_size_bytes']:,} байт (сжатие {compression_ratio:.1f}x)"
            )

            return True

        except Exception as e:
            logger.error(f"❌ Ошибка сохранения индекса в файл: {e}")
            return False

    async def load_index_from_file(self, dump_dir: str = "/dump") -> bool:
        """🔄 Восстанавливает векторный индекс из файла"""
        try:
            dump_path = Path(dump_dir)
            index_file = dump_path / f"{MUVERA_INDEX_NAME}_vector_index.pkl.gz"
            metadata_file = dump_path / f"{MUVERA_INDEX_NAME}_metadata.json"

            # Проверяем существование файлов
            if not index_file.exists():
                logger.info(f"🔍 Сохраненный индекс не найден: {index_file}")
                return False

            # Загружаем метаданные если есть
            metadata = None
            if metadata_file.exists():
                try:
                    with Path(metadata_file).open(mode="r", encoding="utf-8") as f:
                        metadata = json.load(f)
                    logger.info(
                        f"🔍 Найден сохраненный индекс: {metadata.get('documents_count', 0)} документов, {metadata.get('embeddings_count', 0)} эмбедингов"
                    )
                    logger.info(
                        f"🔍 Размер файла: {metadata.get('compressed_size_bytes', 0):,} байт (сжато), сохранен: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(metadata.get('saved_at', 0)))}"
                    )
                except Exception as e:
                    logger.warning(f"⚠️ Не удалось загрузить метаданные: {e}")

            # Загружаем и распаковываем основные данные
            with gzip.open(index_file, "rb") as f:
                serialized_data = f.read()
                # Десериализуем данные
                import pickle

                index_data = pickle.loads(serialized_data)  # noqa: S301

            # Проверяем версию совместимости
            if index_data.get("version") != "1.0":
                logger.warning(f"🔍 Несовместимая версия индекса: {index_data.get('version')}")
                return False

            # Восстанавливаем данные
            self.documents = index_data["documents"]
            self.embeddings = index_data["embeddings"]
            self.vector_dimension = index_data["vector_dimension"]
            self.buckets = index_data["buckets"]

            file_size = int(index_file.stat().st_size)
            decompression_ratio = len(serialized_data) / file_size if file_size > 0 else 1.0

            logger.info("🔄 Векторный индекс восстановлен из файла:")
            logger.info(f"  📁 Файл: {index_file}")
            logger.info(f"  📊 Документов: {len(self.documents)}, эмбедингов: {len(self.embeddings)}")
            logger.info(
                f"  💾 Размер: {file_size:,} → {len(serialized_data):,} байт (декомпрессия {decompression_ratio:.1f}x)"
            )

            return True

        except Exception as e:
            logger.error(f"❌ Ошибка восстановления индекса из файла: {e}")
            return False

    async def get_index_metadata_from_file(self, dump_dir: str = "/dump") -> dict[str, Any] | None:
        """📊 Получает метаданные сохраненного индекса из файла"""
        try:
            dump_path = Path(dump_dir)
            metadata_file = dump_path / f"{MUVERA_INDEX_NAME}_metadata.json"

            if metadata_file.exists():
                with Path(metadata_file).open(mode="r", encoding="utf-8") as f:
                    return json.load(f)
            return None

        except Exception as e:
            logger.error(f"❌ Ошибка получения метаданных индекса: {e}")
            return None

    async def close(self) -> None:
        """Close the wrapper (no-op for this simple implementation)"""


class SearchService:
    def __init__(self) -> None:
        self.available: bool = False
        self.muvera_client: Any = None
        self.client: Any = None

        # Initialize local Muvera
        try:
            self.muvera_client = MuveraWrapper(
                vector_dimension=768,  # Standard embedding dimension
                cache_enabled=True,
                batch_size=SEARCH_MAX_BATCH_SIZE,
            )
            self.available = True
            logger.info(f"Local Muvera wrapper initialized - index: {MUVERA_INDEX_NAME}")

        except Exception as e:
            logger.error(f"Failed to initialize Muvera: {e}")
            self.available = False

    async def async_init(self) -> None:
        """🔄 Асинхронная инициализация - восстановление индекса"""
        if self.muvera_client:
            await self.muvera_client.async_init()

    async def info(self) -> dict:
        """Return information about search service"""
        if not self.available:
            return {"status": "disabled"}
        try:
            # Get Muvera service info
            if self.muvera_client:
                muvera_info = await self.muvera_client.info()
                return {"status": "enabled", "provider": "muvera", "mode": "local", "muvera_info": muvera_info}
            return {"status": "error", "message": "Muvera client not available"}
        except Exception:
            logger.exception("Failed to get search info")
            return {"status": "error", "message": "Failed to get search info"}

    def is_ready(self) -> bool:
        """Check if service is available"""
        return self.available

    async def search(self, text: str, limit: int, offset: int) -> list:
        """Search documents using Muvera"""
        if not self.available or not self.muvera_client:
            return []

        try:
            logger.info(f"Muvera search for: '{text}' (limit={limit}, offset={offset})")

            # Perform Muvera search
            results = await self.muvera_client.search(
                query=text,
                limit=limit + offset,  # Get enough results for pagination
            )

            # Format results to match your existing format
            formatted_results = []
            for result in results:
                formatted_results.append(
                    {
                        "id": str(result.get("id", "")),
                        "score": result.get("score", 0.0),
                        "metadata": result.get("metadata", {}),
                    }
                )

            # Apply pagination
            return formatted_results[offset : offset + limit]

        except Exception as e:
            logger.exception(f"Muvera search failed for '{text}': {e}")
            return []

    async def search_authors(self, text: str, limit: int = 10, offset: int = 0) -> list:
        """Search only for authors using Muvera"""
        if not self.available or not self.muvera_client or not text.strip():
            return []

        try:
            logger.info(f"Muvera author search for: '{text}' (limit={limit}, offset={offset})")

            # Use Muvera to search with author-specific filtering
            results = await self.muvera_client.search(
                query=text,
                limit=limit + offset,
            )

            # Format results
            author_results = []
            for result in results:
                author_results.append(
                    {
                        "id": str(result.get("id", "")),
                        "score": result.get("score", 0.0),
                        "metadata": result.get("metadata", {}),
                    }
                )

            # Apply pagination
            return author_results[offset : offset + limit]

        except Exception:
            logger.exception(f"Error searching authors for '{text}'")
            return []

    def index(self, shout: Any) -> None:
        """Index a single document using Muvera"""
        if not self.available or not self.muvera_client:
            return

        logger.info(f"Muvera indexing post {shout.id}")
        # Start in background to not block
        background_tasks.append(asyncio.create_task(self.perform_muvera_index(shout)))

    async def perform_muvera_index(self, shout: Any) -> None:
        """Index a single document using Muvera"""
        if not self.muvera_client:
            return

        try:
            logger.info(f"Muvera indexing document {shout.id}")

            # Prepare document data for Muvera
            doc_data: Dict[str, Any] = {
                "id": str(shout.id),
                "title": getattr(shout, "title", "") or "",
                "body": "",
                "metadata": {},
            }

            # Combine body content
            body_parts = []
            for field_name in ["subtitle", "lead", "body"]:
                field_value = getattr(shout, field_name, None)
                if field_value and isinstance(field_value, str) and field_value.strip():
                    body_parts.append(field_value.strip())

            # Process media content
            media = getattr(shout, "media", None)
            if media:
                if isinstance(media, str):
                    try:
                        media_json = json.loads(media)
                        if isinstance(media_json, dict):
                            if "title" in media_json:
                                body_parts.append(media_json["title"])
                            if "body" in media_json:
                                body_parts.append(media_json["body"])
                    except json.JSONDecodeError:
                        body_parts.append(media)
                elif isinstance(media, dict) and (media.get("title") or media.get("body")):
                    if media.get("title"):
                        body_parts.append(media["title"])
                    if media.get("body"):
                        body_parts.append(media["body"])

            # Set body content
            if body_parts:
                doc_data["body"] = " ".join(body_parts)

            # Add metadata
            doc_data["metadata"] = {
                "layout": getattr(shout, "layout", "article"),
                "lang": getattr(shout, "lang", "ru"),
                "created_at": getattr(shout, "created_at", 0),
                "created_by": getattr(shout, "created_by", 0),
            }

            # Index with Muvera (single document = verbose mode)
            await self.muvera_client.index(documents=[doc_data], silent=False)

            logger.info(f"🚀 Document {shout.id} indexed successfully")

        except Exception:
            logger.exception(f"Muvera indexing error for shout {shout.id}")

    async def bulk_index(self, shouts: list) -> None:
        """Index multiple documents using Muvera"""
        if not self.available or not self.muvera_client or not shouts:
            logger.warning(
                f"Bulk indexing skipped: available={self.available}, shouts_count={len(shouts) if shouts else 0}"
            )
            return

        # Запускаем метрики индексации
        start_time = time.time()
        logger.info(f"Starting Muvera bulk indexing of {len(shouts)} documents")

        # Prepare documents for Muvera
        documents: List[Dict[str, Any]] = []
        total_skipped = 0

        for shout in shouts:
            try:
                # Prepare document data for Muvera
                doc_data: Dict[str, Any] = {
                    "id": str(getattr(shout, "id", "")),
                    "title": getattr(shout, "title", "") or "",
                    "body": "",
                    "metadata": {},
                }

                # Combine body content
                body_parts = []
                for field_name in ["subtitle", "lead", "body"]:
                    field_value = getattr(shout, field_name, None)
                    if field_value and isinstance(field_value, str) and field_value.strip():
                        body_parts.append(field_value.strip())

                # Process media content
                media = getattr(shout, "media", None)
                if media:
                    if isinstance(media, str):
                        try:
                            media_json = json.loads(media)
                            if isinstance(media_json, dict):
                                if "title" in media_json:
                                    body_parts.append(media_json["title"])
                                if "body" in media_json:
                                    body_parts.append(media_json["body"])
                        except json.JSONDecodeError:
                            body_parts.append(media)
                    elif isinstance(media, dict) and (media.get("title") or media.get("body")):
                        if media.get("title"):
                            body_parts.append(media["title"])
                        if media.get("body"):
                            body_parts.append(media["body"])

                # Set body content
                if body_parts:
                    doc_data["body"] = " ".join(body_parts)

                # Add metadata
                doc_data["metadata"] = {
                    "layout": getattr(shout, "layout", "article"),
                    "lang": getattr(shout, "lang", "ru"),
                    "created_at": getattr(shout, "created_at", 0),
                    "created_by": getattr(shout, "created_by", 0),
                }

                documents.append(doc_data)

            except Exception:
                logger.exception(f"Error processing shout {getattr(shout, 'id', 'unknown')} for indexing")
                total_skipped += 1

        if documents:
            try:
                # 🤫 Index with Muvera in silent mode for batch operations
                await self.muvera_client.index(documents=documents, silent=True)

                elapsed = time.time() - start_time
                logger.info(
                    f"🚀 Bulk indexing completed in {elapsed:.2f}s: "
                    f"{len(documents)} documents indexed, {total_skipped} shouts skipped"
                )
            except Exception as e:
                logger.exception(f"Muvera bulk indexing failed: {e}")
        else:
            logger.warning("No documents to index")

    async def verify_docs(self, doc_ids: list) -> dict:
        """Verify which documents exist in the search index using Muvera"""
        if not self.available or not self.muvera_client:
            return {"status": "disabled"}

        try:
            logger.info(f"Verifying {len(doc_ids)} documents in Muvera search index")

            # Use Muvera to verify documents
            verification_result = await self.muvera_client.verify_documents(doc_ids)

            # Format result to match expected structure
            missing_ids = verification_result.get("missing", [])

            logger.info(
                f"Document verification complete: {len(missing_ids)} documents missing out of {len(doc_ids)} total"
            )

            return {"missing": missing_ids, "details": {"missing_count": len(missing_ids), "total_count": len(doc_ids)}}
        except Exception:
            logger.exception("Document verification error")
            return {"status": "error", "message": "Document verification error"}

    async def check_index_status(self) -> dict:
        """Get detailed statistics about the search index health using Muvera"""
        if not self.available or not self.muvera_client:
            return {"status": "disabled"}

        try:
            # Get Muvera index status
            index_status = await self.muvera_client.get_index_status()

            # Check for consistency issues
            if index_status.get("consistency", {}).get("status") != "ok":
                null_count = index_status.get("consistency", {}).get("null_embeddings_count", 0)
                if null_count > 0:
                    logger.warning(f"Found {null_count} documents with NULL embeddings")

            return index_status
        except Exception:
            logger.exception("Failed to check index status")
            return {"status": "error", "message": "Failed to check index status"}

    async def close(self) -> None:
        """Close connections and release resources"""
        if hasattr(self, "muvera_client") and self.muvera_client:
            try:
                await self.muvera_client.close()
                logger.info("Local Muvera client closed")
            except Exception as e:
                logger.warning(f"Error closing Muvera client: {e}")
        logger.info("Search service closed")


# Create the search service singleton
search_service = SearchService()


# API-compatible functions for backward compatibility
async def search_text(text: str, limit: int = 200, offset: int = 0) -> list:
    """Search text using Muvera - backward compatibility function"""
    if search_service.available:
        return await search_service.search(text, limit, offset)
    return []


async def search_author_text(text: str, limit: int = 10, offset: int = 0) -> list:
    """Search authors using Muvera - backward compatibility function"""
    if search_service.available:
        return await search_service.search_authors(text, limit, offset)
    return []


async def get_search_count(text: str) -> int:
    """Get count of search results - backward compatibility function"""
    if not search_service.available:
        return 0
    # Get results and count them
    results = await search_text(text, SEARCH_PREFETCH_SIZE, 0)
    return len(results)


async def get_author_search_count(text: str) -> int:
    """Get count of author search results - backward compatibility function"""
    if not search_service.available:
        return 0
    # Get results and count them
    results = await search_author_text(text, SEARCH_PREFETCH_SIZE, 0)
    return len(results)


async def initialize_search_index(shouts_data: list) -> None:
    """Initialize search index with existing data - backward compatibility function"""
    if not search_service.available:
        logger.warning("Search service not available for initialization")
        return

    try:
        # Сначала пытаемся восстановить существующий индекс
        await search_service.async_init()

        # Проверяем нужна ли переиндексация
        if len(shouts_data) > 0:
            await search_service.bulk_index(shouts_data)
            logger.info(f"Initialized search index with {len(shouts_data)} documents")
    except Exception as e:
        logger.exception(f"Failed to initialize search index: {e}")


async def check_search_service() -> None:
    """Check if search service is available - backward compatibility function"""
    if search_service.available:
        logger.info("Search service is available and ready")
    else:
        logger.warning("Search service is not available")


async def initialize_search_index_background() -> None:
    """Initialize search index in background - backward compatibility function"""
    try:
        logger.info("Background search index initialization started")
        # This function is kept for compatibility but doesn't do much
        # since Muvera handles indexing automatically
        logger.info("Background search index initialization completed")
    except Exception:
        logger.exception("Error in background search index initialization")
-												indexing-fix

											
										
										
											2024-02-29 14:04:24 +03:00
+								import asyncio
-												search-index-restore

											
										
										
											2025-09-01 12:05:30 +03:00
+								import gzip
-												redis cached

											
										
										
											2022-11-17 22:53:58 +03:00
+								import json
-												search-index-restore

											
										
										
											2025-09-01 12:05:30 +03:00
+								import pickle
-												linted+fmt

											
										
										
											2025-05-29 12:37:39 +03:00
+								import time
-												search-index-restore

											
										
										
											2025-09-01 12:05:30 +03:00
+								from pathlib import Path
 								from typing import Any, Dict, List, cast
-												linted+fmt

											
										
										
											2025-05-29 12:37:39 +03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								import muvera
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
+								import numpy as np
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								from sentence_transformers import SentenceTransformer
-												linted+fmt

											
										
										
											2025-05-29 12:37:39 +03:00
-												muvera-index-fix

											
										
										
											2025-08-30 20:41:13 +03:00
+								from settings import MUVERA_INDEX_NAME, SEARCH_MAX_BATCH_SIZE, SEARCH_PREFETCH_SIZE
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								from utils.logger import root_logger as logger
-												search-with-images7

											
										
										
											2024-06-02 16:36:12 +03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								# Global collection for background tasks
 								background_tasks: List[asyncio.Task] = []
-												indexing-fix-3

											
										
										
											2024-02-29 14:09:50 +03:00
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
+								class MuveraWrapper:
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								    """🔍 Real vector search with SentenceTransformers + FDE encoding"""
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
 								    def __init__(self, vector_dimension: int = 768, cache_enabled: bool = True, batch_size: int = 100) -> None:
 								        self.vector_dimension = vector_dimension
 								        self.cache_enabled = cache_enabled
 								        self.batch_size = batch_size
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								        self.encoder: Any = None
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
+								        self.buckets = 128  # Default number of buckets for FDE encoding
 								        self.documents: Dict[str, Dict[str, Any]] = {}  # Simple in-memory storage for demo
 								        self.embeddings: Dict[str, np.ndarray | None] = {}  # Store encoded embeddings
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								        # 🚀 Инициализируем реальную модель эмбедингов
 								        try:
 								            # Используем многоязычную модель, хорошо работающую с русским
 								            self.encoder = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
 								            logger.info("🔍 SentenceTransformer model loaded successfully")
 								        except Exception as e:
 								            logger.error(f"Failed to load SentenceTransformer: {e}")
 								            # Fallback - простая модель
 								            try:
 								                self.encoder = SentenceTransformer("all-MiniLM-L6-v2")
 								                logger.info("🔍 Fallback SentenceTransformer model loaded")
 								            except Exception:
 								                logger.error("Failed to load any SentenceTransformer model")
 								                self.encoder = None
-												search-index-restore

											
										
										
											2025-09-01 12:05:30 +03:00
+								    async def async_init(self) -> None:
 								        """🔄 Асинхронная инициализация - восстановление индекса из файла"""
 								        try:
 								            logger.info("🔍 Пытаемся восстановить векторный индекс из файла...")
 								            # Проверяем метаданные сначала
 								            metadata = await self.get_index_metadata_from_file()
 								            if metadata:
 								                logger.info(
 								                    f"🔍 Найден сохраненный индекс от {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(metadata.get('saved_at', 0)))}"
 								                )
 								                # Восстанавливаем индекс
 								                if await self.load_index_from_file():
 								                    logger.info("✅ Векторный индекс успешно восстановлен из файла")
 								                else:
 								                    logger.warning("⚠️ Не удалось восстановить индекс из файла")
 								            else:
 								                logger.info("🔍 Сохраненный индекс не найден, будет создан новый")
 								        except Exception as e:
 								            logger.error(f"❌ Ошибка при восстановлении индекса: {e}")
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
+								    async def info(self) -> dict:
 								        """Return service information"""
 								        return {
 								            "vector_dimension": self.vector_dimension,
 								            "buckets": self.buckets,
 								            "documents_count": len(self.documents),
 								            "cache_enabled": self.cache_enabled,
 								        }
 								    async def search(self, query: str, limit: int) -> List[Dict[str, Any]]:
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								        """🔍 Real vector search using SentenceTransformers + FDE encoding"""
 								        if not query.strip() or not self.encoder:
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
+								            return []
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								        try:
 								            # 🚀 Генерируем настоящий эмбединг запроса
 								            query_text = query.strip()
 								            query_embedding = self.encoder.encode(query_text, convert_to_numpy=True)
 								            # Нормализуем размерность для FDE
 								            if query_embedding.ndim == 1:
 								                query_embedding = query_embedding.reshape(1, -1)
 								            # Encode query using FDE
 								            query_fde = muvera.encode_fde(query_embedding, self.buckets, "avg")
 								            # 🔍 Semantic similarity search
 								            results = []
 								            for doc_id, doc_embedding in self.embeddings.items():
 								                if doc_embedding is not None:
 								                    # Calculate cosine similarity
 								                    similarity = np.dot(query_fde, doc_embedding) / (
 								                        np.linalg.norm(query_fde) * np.linalg.norm(doc_embedding) + 1e-8
 								                    )
 								                    results.append(
 								                        {
 								                            "id": doc_id,
 								                            "score": float(similarity),
 								                            "metadata": self.documents.get(doc_id, {}).get("metadata", {}),
 								                        }
 								                    )
 								            # Sort by score and limit results
 								            results.sort(key=lambda x: x["score"], reverse=True)
 								            return results[:limit]
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								        except Exception as e:
 								            logger.error(f"🔍 Search error: {e}")
 								            return []
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								    async def index(self, documents: List[Dict[str, Any]], silent: bool = False) -> None:
 								        """🚀 Index documents using real SentenceTransformers + FDE encoding"""
 								        if not self.encoder:
 								            if not silent:
 								                logger.warning("🔍 No encoder available for indexing")
 								            return
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								        # 🤫 Batch mode detection
 								        is_batch = len(documents) > 10
 								        indexed_count = 0
 								        skipped_count = 0
 								        if is_batch:
 								            # 🚀 Batch processing for better performance
 								            valid_docs = []
 								            doc_contents = []
 								            for doc in documents:
 								                doc_id = doc["id"]
 								                self.documents[doc_id] = doc
 								                title = doc.get("title", "").strip()
 								                body = doc.get("body", "").strip()
 								                doc_content = f"{title} {body}".strip()
 								                if doc_content:
 								                    valid_docs.append(doc)
 								                    doc_contents.append(doc_content)
 								                else:
 								                    skipped_count += 1
 								            if doc_contents:
 								                try:
 								                    # 🚀 Batch encode all documents at once
 								                    batch_embeddings = self.encoder.encode(
 								                        doc_contents, convert_to_numpy=True, show_progress_bar=not silent, batch_size=32
 								                    )
 								                    # Process each embedding
 								                    for doc, embedding in zip(valid_docs, batch_embeddings, strict=False):
 								                        emb = embedding
 								                        doc_id = doc["id"]
 								                        # Нормализуем размерность для FDE
 								                        if emb.ndim == 1:
 								                            emb = emb.reshape(1, -1)
 								                        # Encode using FDE
 								                        doc_fde = muvera.encode_fde(emb, self.buckets, "avg")
 								                        self.embeddings[doc_id] = doc_fde
 								                        indexed_count += 1
 								                except Exception as e:
 								                    if not silent:
 								                        logger.error(f"🔍 Batch encoding error: {e}")
 								                    return
 								        else:
 								            # 🔍 Single document processing
 								            for doc in documents:
 								                try:
 								                    doc_id = doc["id"]
 								                    self.documents[doc_id] = doc
 								                    title = doc.get("title", "").strip()
 								                    body = doc.get("body", "").strip()
 								                    doc_content = f"{title} {body}".strip()
 								                    if not doc_content:
 								                        if not silent:
 								                            logger.warning(f"🔍 Empty content for document {doc_id}")
 								                        skipped_count += 1
 								                        continue
 								                    # 🚀 Single document encoding
 								                    doc_embedding = self.encoder.encode(doc_content, convert_to_numpy=True, show_progress_bar=False)
 								                    if doc_embedding.ndim == 1:
 								                        doc_embedding = doc_embedding.reshape(1, -1)
 								                    doc_fde = muvera.encode_fde(doc_embedding, self.buckets, "avg")
 								                    self.embeddings[doc_id] = doc_fde
 								                    indexed_count += 1
 								                    if not silent:
 								                        logger.debug(f"🔍 Indexed document {doc_id} with content length {len(doc_content)}")
 								                except Exception as e:
 								                    if not silent:
 								                        logger.error(f"🔍 Indexing error for document {doc.get('id', 'unknown')}: {e}")
 								                    skipped_count += 1
 								                    continue
 								        # 🔍 Final statistics
 								        if not silent:
 								            if is_batch:
 								                logger.info(f"🚀 Batch indexed {indexed_count} documents, skipped {skipped_count}")
 								            elif indexed_count > 0:
 								                logger.debug(f"🔍 Indexed {indexed_count} documents")
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
-												search-index-restore

											
										
										
											2025-09-01 12:05:30 +03:00
+								        # 🗃️ Автосохранение индекса после успешной индексации
 								        if indexed_count > 0:
 								            try:
 								                await self.save_index_to_file()
 								                if not silent:
 								                    logger.debug("💾 Индекс автоматически сохранен в файл")
 								            except Exception as e:
 								                logger.warning(f"⚠️ Не удалось автоматически сохранить индекс: {e}")
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
+								    async def verify_documents(self, doc_ids: List[str]) -> Dict[str, Any]:
 								        """Verify which documents exist in the index"""
 								        missing = [doc_id for doc_id in doc_ids if doc_id not in self.documents]
 								        return {"missing": missing}
 								    async def get_index_status(self) -> Dict[str, Any]:
 								        """Get index status information"""
 								        return {
 								            "total_documents": len(self.documents),
 								            "total_embeddings": len(self.embeddings),
 								            "consistency": {"status": "ok", "null_embeddings_count": 0},
 								        }
-												search-index-restore

											
										
										
											2025-09-01 12:05:30 +03:00
+								    async def save_index_to_file(self, dump_dir: str = "/dump") -> bool:
 								        """🗃️ Сохраняет векторный индекс в файл с использованием gzip сжатия"""
 								        try:
 								            # Создаем директорию если не существует
 								            dump_path = Path(dump_dir)
 								            dump_path.mkdir(parents=True, exist_ok=True)
 								            # Подготавливаем данные для сериализации
 								            index_data = {
 								                "documents": self.documents,
 								                "embeddings": self.embeddings,
 								                "vector_dimension": self.vector_dimension,
 								                "buckets": self.buckets,
 								                "timestamp": int(time.time()),
 								                "version": "1.0",
 								                "index_name": MUVERA_INDEX_NAME,
 								            }
 								            # Сериализуем данные с pickle
 								            serialized_data = pickle.dumps(index_data)
 								            # Подготавливаем имена файлов
 								            index_file = dump_path / f"{MUVERA_INDEX_NAME}_vector_index.pkl.gz"
 								            metadata_file = dump_path / f"{MUVERA_INDEX_NAME}_metadata.json"
 								            # Сохраняем основной индекс с gzip сжатием
 								            with gzip.open(index_file, "wb") as f:
 								                f.write(serialized_data)
 								            # Сохраняем метаданные отдельно для быстрого доступа
 								            metadata = {
 								                "documents_count": len(self.documents),
 								                "embeddings_count": len(self.embeddings),
 								                "vector_dimension": self.vector_dimension,
 								                "buckets": self.buckets,
 								                "saved_at": int(time.time()),
 								                "version": "1.0",
 								                "index_name": MUVERA_INDEX_NAME,
 								                "original_size_bytes": len(serialized_data),
 								                "compressed_size_bytes": int(index_file.stat().st_size) if index_file.exists() else 0,
 								                "index_file": str(index_file),
 								                "metadata_file": str(metadata_file),
 								            }
 								            with Path(metadata_file).open(mode="w", encoding="utf-8") as f:
 								                json.dump(metadata, f, indent=2, ensure_ascii=False)
 								            original_size = cast(int, metadata["original_size_bytes"])
 								            compressed_size = cast(int, metadata["compressed_size_bytes"])
 								            compression_ratio = original_size / compressed_size if compressed_size > 0 else 1.0
 								            logger.info("🗃️ Векторный индекс сохранен в файл:")
 								            logger.info(f"  📁 Файл: {index_file}")
 								            logger.info(f"  📊 Документов: {len(self.documents)}, эмбедингов: {len(self.embeddings)}")
 								            logger.info(
 								                f"  💾 Размер: {metadata['original_size_bytes']:,} → {metadata['compressed_size_bytes']:,} байт (сжатие {compression_ratio:.1f}x)"
 								            )
 								            return True
 								        except Exception as e:
 								            logger.error(f"❌ Ошибка сохранения индекса в файл: {e}")
 								            return False
 								    async def load_index_from_file(self, dump_dir: str = "/dump") -> bool:
 								        """🔄 Восстанавливает векторный индекс из файла"""
 								        try:
 								            dump_path = Path(dump_dir)
 								            index_file = dump_path / f"{MUVERA_INDEX_NAME}_vector_index.pkl.gz"
 								            metadata_file = dump_path / f"{MUVERA_INDEX_NAME}_metadata.json"
 								            # Проверяем существование файлов
 								            if not index_file.exists():
 								                logger.info(f"🔍 Сохраненный индекс не найден: {index_file}")
 								                return False
 								            # Загружаем метаданные если есть
 								            metadata = None
 								            if metadata_file.exists():
 								                try:
 								                    with Path(metadata_file).open(mode="r", encoding="utf-8") as f:
 								                        metadata = json.load(f)
 								                    logger.info(
 								                        f"🔍 Найден сохраненный индекс: {metadata.get('documents_count', 0)} документов, {metadata.get('embeddings_count', 0)} эмбедингов"
 								                    )
 								                    logger.info(
 								                        f"🔍 Размер файла: {metadata.get('compressed_size_bytes', 0):,} байт (сжато), сохранен: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(metadata.get('saved_at', 0)))}"
 								                    )
 								                except Exception as e:
 								                    logger.warning(f"⚠️ Не удалось загрузить метаданные: {e}")
 								            # Загружаем и распаковываем основные данные
 								            with gzip.open(index_file, "rb") as f:
 								                serialized_data = f.read()
 								                # Десериализуем данные
 								                import pickle
 								                index_data = pickle.loads(serialized_data)  # noqa: S301
 								            # Проверяем версию совместимости
 								            if index_data.get("version") != "1.0":
 								                logger.warning(f"🔍 Несовместимая версия индекса: {index_data.get('version')}")
 								                return False
 								            # Восстанавливаем данные
 								            self.documents = index_data["documents"]
 								            self.embeddings = index_data["embeddings"]
 								            self.vector_dimension = index_data["vector_dimension"]
 								            self.buckets = index_data["buckets"]
 								            file_size = int(index_file.stat().st_size)
 								            decompression_ratio = len(serialized_data) / file_size if file_size > 0 else 1.0
 								            logger.info("🔄 Векторный индекс восстановлен из файла:")
 								            logger.info(f"  📁 Файл: {index_file}")
 								            logger.info(f"  📊 Документов: {len(self.documents)}, эмбедингов: {len(self.embeddings)}")
 								            logger.info(
 								                f"  💾 Размер: {file_size:,} → {len(serialized_data):,} байт (декомпрессия {decompression_ratio:.1f}x)"
 								            )
 								            return True
 								        except Exception as e:
 								            logger.error(f"❌ Ошибка восстановления индекса из файла: {e}")
 								            return False
 								    async def get_index_metadata_from_file(self, dump_dir: str = "/dump") -> dict[str, Any] | None:
 								        """📊 Получает метаданные сохраненного индекса из файла"""
 								        try:
 								            dump_path = Path(dump_dir)
 								            metadata_file = dump_path / f"{MUVERA_INDEX_NAME}_metadata.json"
 								            if metadata_file.exists():
 								                with Path(metadata_file).open(mode="r", encoding="utf-8") as f:
 								                    return json.load(f)
 								            return None
 								        except Exception as e:
 								            logger.error(f"❌ Ошибка получения метаданных индекса: {e}")
 								            return None
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
+								    async def close(self) -> None:
 								        """Close the wrapper (no-op for this simple implementation)"""
-												inner-search-2

											
										
										
											2024-01-29 04:09:54 +03:00
+								class SearchService:
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
+								    def __init__(self) -> None:
-												mypy-fixed

											
										
										
											2025-07-31 19:27:58 +03:00
+								        self.available: bool = False
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        self.muvera_client: Any = None
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
+								        self.client: Any = None
-												search-fix, devstart-fix, cache-fix, logs-less

											
										
										
											2025-07-31 19:12:21 +03:00
-												muvera-index-fix

											
										
										
											2025-08-30 20:41:13 +03:00
+								        # Initialize local Muvera
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        try:
-												search-wrapper

											
										
										
											2025-08-23 14:08:34 +03:00
+								            self.muvera_client = MuveraWrapper(
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                vector_dimension=768,  # Standard embedding dimension
 								                cache_enabled=True,
 								                batch_size=SEARCH_MAX_BATCH_SIZE,
 								            )
 								            self.available = True
-												muvera-index-fix

											
										
										
											2025-08-30 20:41:13 +03:00
+								            logger.info(f"Local Muvera wrapper initialized - index: {MUVERA_INDEX_NAME}")
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        except Exception as e:
 								            logger.error(f"Failed to initialize Muvera: {e}")
-												search-fix, devstart-fix, cache-fix, logs-less

											
										
										
											2025-07-31 19:12:21 +03:00
+								            self.available = False
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												search-index-restore

											
										
										
											2025-09-01 12:05:30 +03:00
+								    async def async_init(self) -> None:
 								        """🔄 Асинхронная инициализация - восстановление индекса"""
 								        if self.muvera_client:
 								            await self.muvera_client.async_init()
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								    async def info(self) -> dict:
 								        """Return information about search service"""
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        if not self.available:
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								            return {"status": "disabled"}
-												search-fail-tolerance

											
										
										
											2024-11-22 20:23:45 +03:00
+								        try:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            # Get Muvera service info
 								            if self.muvera_client:
 								                muvera_info = await self.muvera_client.info()
-												index-metric

											
										
										
											2025-08-30 21:20:01 +03:00
+								                return {"status": "enabled", "provider": "muvera", "mode": "local", "muvera_info": muvera_info}
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            return {"status": "error", "message": "Muvera client not available"}
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								        except Exception:
-												tests-passed

											
										
										
											2025-07-31 18:55:59 +03:00
+								            logger.exception("Failed to get search info")
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								            return {"status": "error", "message": "Failed to get search info"}
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
+								    def is_ready(self) -> bool:
-												feat: moved txtai and search procedure in different instance

											
										
										
											2025-03-12 12:06:09 -03:00
+								        """Check if service is available"""
-												debug: something wrong one stap back with logs

											
										
										
											2025-03-12 13:11:19 -03:00
+								        return self.available
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    async def search(self, text: str, limit: int, offset: int) -> list:
 								        """Search documents using Muvera"""
 								        if not self.available or not self.muvera_client:
 								            return []
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												feat: with index sync endpoints configs

											
										
										
											2025-03-25 13:31:45 -03:00
+								        try:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            logger.info(f"Muvera search for: '{text}' (limit={limit}, offset={offset})")
 								            # Perform Muvera search
 								            results = await self.muvera_client.search(
 								                query=text,
 								                limit=limit + offset,  # Get enough results for pagination
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								            )
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            # Format results to match your existing format
 								            formatted_results = []
 								            for result in results:
 								                formatted_results.append(
 								                    {
 								                        "id": str(result.get("id", "")),
 								                        "score": result.get("score", 0.0),
 								                        "metadata": result.get("metadata", {}),
 								                    }
 								                )
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            # Apply pagination
 								            return formatted_results[offset : offset + limit]
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        except Exception as e:
 								            logger.exception(f"Muvera search failed for '{text}': {e}")
 								            return []
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    async def search_authors(self, text: str, limit: int = 10, offset: int = 0) -> list:
 								        """Search only for authors using Muvera"""
 								        if not self.available or not self.muvera_client or not text.strip():
 								            return []
 								        try:
 								            logger.info(f"Muvera author search for: '{text}' (limit={limit}, offset={offset})")
 								            # Use Muvera to search with author-specific filtering
 								            results = await self.muvera_client.search(
 								                query=text,
 								                limit=limit + offset,
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								            )
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
 								            # Format results
 								            author_results = []
 								            for result in results:
 								                author_results.append(
 								                    {
 								                        "id": str(result.get("id", "")),
 								                        "score": result.get("score", 0.0),
 								                        "metadata": result.get("metadata", {}),
 								                    }
 								                )
 								            # Apply pagination
 								            return author_results[offset : offset + limit]
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								        except Exception:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            logger.exception(f"Error searching authors for '{text}'")
 								            return []
-												feat(search.py): separate indexing of Shout Title, shout Body and Authors

											
										
										
											2025-04-20 19:22:08 -03:00
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								    def index(self, shout: Any) -> None:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        """Index a single document using Muvera"""
 								        if not self.available or not self.muvera_client:
-												saerch-fail-toler

											
										
										
											2024-11-22 20:32:14 +03:00
+								            return
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
 								        logger.info(f"Muvera indexing post {shout.id}")
 								        # Start in background to not block
 								        background_tasks.append(asyncio.create_task(self.perform_muvera_index(shout)))
 								    async def perform_muvera_index(self, shout: Any) -> None:
 								        """Index a single document using Muvera"""
 								        if not self.muvera_client:
-												feat: moved txtai and search procedure in different instance

											
										
										
											2025-03-12 12:06:09 -03:00
+								            return
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												feat(search.py): change to txtai server, with ai model. And fix granian workers

											
										
										
											2025-03-05 20:08:21 +00:00
+								        try:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            logger.info(f"Muvera indexing document {shout.id}")
 								            # Prepare document data for Muvera
 								            doc_data: Dict[str, Any] = {
 								                "id": str(shout.id),
 								                "title": getattr(shout, "title", "") or "",
 								                "body": "",
 								                "metadata": {},
 								            }
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            # Combine body content
 								            body_parts = []
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								            for field_name in ["subtitle", "lead", "body"]:
-												feat(search.py): separate indexing of Shout Title, shout Body and Authors

											
										
										
											2025-04-20 19:22:08 -03:00
+								                field_value = getattr(shout, field_name, None)
 								                if field_value and isinstance(field_value, str) and field_value.strip():
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                    body_parts.append(field_value.strip())
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            # Process media content
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								            media = getattr(shout, "media", None)
-												feat(search.py): separate indexing of Shout Title, shout Body and Authors

											
										
										
											2025-04-20 19:22:08 -03:00
+								            if media:
 								                if isinstance(media, str):
 								                    try:
 								                        media_json = json.loads(media)
 								                        if isinstance(media_json, dict):
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								                            if "title" in media_json:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                                body_parts.append(media_json["title"])
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								                            if "body" in media_json:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                                body_parts.append(media_json["body"])
-												feat(search.py): separate indexing of Shout Title, shout Body and Authors

											
										
										
											2025-04-20 19:22:08 -03:00
+								                    except json.JSONDecodeError:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                        body_parts.append(media)
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								                elif isinstance(media, dict) and (media.get("title") or media.get("body")):
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                    if media.get("title"):
 								                        body_parts.append(media["title"])
 								                    if media.get("body"):
 								                        body_parts.append(media["body"])
 								            # Set body content
 								            if body_parts:
 								                doc_data["body"] = " ".join(body_parts)
 								            # Add metadata
 								            doc_data["metadata"] = {
 								                "layout": getattr(shout, "layout", "article"),
 								                "lang": getattr(shout, "lang", "ru"),
 								                "created_at": getattr(shout, "created_at", 0),
 								                "created_by": getattr(shout, "created_by", 0),
 								            }
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								            # Index with Muvera (single document = verbose mode)
 								            await self.muvera_client.index(documents=[doc_data], silent=False)
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								            logger.info(f"🚀 Document {shout.id} indexed successfully")
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
+								        except Exception:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            logger.exception(f"Muvera indexing error for shout {shout.id}")
-												reindex-fix

											
										
										
											2024-04-08 10:23:54 +03:00
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								    async def bulk_index(self, shouts: list) -> None:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        """Index multiple documents using Muvera"""
 								        if not self.available or not self.muvera_client or not shouts:
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								            logger.warning(
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								                f"Bulk indexing skipped: available={self.available}, shouts_count={len(shouts) if shouts else 0}"
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								            )
-												feat(search.py): change to txtai server, with ai model. And fix granian workers

											
										
										
											2025-03-05 20:08:21 +00:00
+								            return
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												search-index-metric

											
										
										
											2025-08-30 21:18:48 +03:00
+								        # Запускаем метрики индексации
-												debug: add logs in search.py and change and input validation ... index ver too

											
										
										
											2025-03-12 14:13:55 -03:00
+								        start_time = time.time()
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        logger.info(f"Starting Muvera bulk indexing of {len(shouts)} documents")
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        # Prepare documents for Muvera
 								        documents: List[Dict[str, Any]] = []
-												feat(search.py): separate indexing of Shout Title, shout Body and Authors

											
										
										
											2025-04-20 19:22:08 -03:00
+								        total_skipped = 0
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												feat(search.py): documnet for bulk indexing are categorized

											
										
										
											2025-03-21 15:40:29 -03:00
+								        for shout in shouts:
 								            try:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                # Prepare document data for Muvera
 								                doc_data: Dict[str, Any] = {
-												search-index-metric

											
										
										
											2025-08-30 21:18:48 +03:00
+								                    "id": str(getattr(shout, "id", "")),
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                    "title": getattr(shout, "title", "") or "",
 								                    "body": "",
 								                    "metadata": {},
 								                }
 								                # Combine body content
 								                body_parts = []
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								                for field_name in ["subtitle", "lead", "body"]:
-												feat(search.py): documnet for bulk indexing are categorized

											
										
										
											2025-03-21 15:40:29 -03:00
+								                    field_value = getattr(shout, field_name, None)
-												linted+fmt

											
										
										
											2025-05-29 12:37:39 +03:00
+								                    if field_value and isinstance(field_value, str) and field_value.strip():
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                        body_parts.append(field_value.strip())
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                # Process media content
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								                media = getattr(shout, "media", None)
-												feat(search.py): documnet for bulk indexing are categorized

											
										
										
											2025-03-21 15:40:29 -03:00
+								                if media:
 								                    if isinstance(media, str):
 								                        try:
 								                            media_json = json.loads(media)
 								                            if isinstance(media_json, dict):
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								                                if "title" in media_json:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                                    body_parts.append(media_json["title"])
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								                                if "body" in media_json:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                                    body_parts.append(media_json["body"])
-												feat(search.py): documnet for bulk indexing are categorized

											
										
										
											2025-03-21 15:40:29 -03:00
+								                        except json.JSONDecodeError:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                            body_parts.append(media)
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								                    elif isinstance(media, dict) and (media.get("title") or media.get("body")):
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                        if media.get("title"):
 								                            body_parts.append(media["title"])
 								                        if media.get("body"):
 								                            body_parts.append(media["body"])
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                # Set body content
 								                if body_parts:
 								                    doc_data["body"] = " ".join(body_parts)
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                # Add metadata
 								                doc_data["metadata"] = {
 								                    "layout": getattr(shout, "layout", "article"),
 								                    "lang": getattr(shout, "lang", "ru"),
 								                    "created_at": getattr(shout, "created_at", 0),
 								                    "created_by": getattr(shout, "created_by", 0),
 								                }
-												feat(author.py):addresolver for searching authors by text

											
										
										
											2025-04-29 17:45:37 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                documents.append(doc_data)
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            except Exception:
 								                logger.exception(f"Error processing shout {getattr(shout, 'id', 'unknown')} for indexing")
 								                total_skipped += 1
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        if documents:
 								            try:
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								                # 🤫 Index with Muvera in silent mode for batch operations
 								                await self.muvera_client.index(documents=documents, silent=True)
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                elapsed = time.time() - start_time
 								                logger.info(
-												embedding-search

											
										
										
											2025-08-31 19:20:43 +03:00
+								                    f"🚀 Bulk indexing completed in {elapsed:.2f}s: "
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                    f"{len(documents)} documents indexed, {total_skipped} shouts skipped"
 								                )
 								            except Exception as e:
 								                logger.exception(f"Muvera bulk indexing failed: {e}")
 								        else:
 								            logger.warning("No documents to index")
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    async def verify_docs(self, doc_ids: list) -> dict:
 								        """Verify which documents exist in the search index using Muvera"""
 								        if not self.available or not self.muvera_client:
 								            return {"status": "disabled"}
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								        try:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            logger.info(f"Verifying {len(doc_ids)} documents in Muvera search index")
-												search-debug

											
										
										
											2025-06-02 22:40:10 +03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            # Use Muvera to verify documents
 								            verification_result = await self.muvera_client.verify_documents(doc_ids)
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            # Format result to match expected structure
 								            missing_ids = verification_result.get("missing", [])
-												feat: follow same logic as search shouts for authors. Store them to Reddis cache + pagination

											
										
										
											2025-05-02 18:17:05 -03:00
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								            logger.info(
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								                f"Document verification complete: {len(missing_ids)} documents missing out of {len(doc_ids)} total"
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
+								            )
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            return {"missing": missing_ids, "details": {"missing_count": len(missing_ids), "total_count": len(doc_ids)}}
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
+								        except Exception:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            logger.exception("Document verification error")
 								            return {"status": "error", "message": "Document verification error"}
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
+								    async def check_index_status(self) -> dict:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        """Get detailed statistics about the search index health using Muvera"""
 								        if not self.available or not self.muvera_client:
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								            return {"status": "disabled"}
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												feat(search.py): detects both missing documents and null embeddings

											
										
										
											2025-03-25 15:18:29 -03:00
+								        try:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            # Get Muvera index status
 								            index_status = await self.muvera_client.get_index_status()
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            # Check for consistency issues
 								            if index_status.get("consistency", {}).get("status") != "ok":
 								                null_count = index_status.get("consistency", {}).get("null_embeddings_count", 0)
-												refactor(search.py): moved to use one table docs for embdings and docs store

											
										
										
											2025-03-25 16:42:44 -03:00
+								                if null_count > 0:
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								                    logger.warning(f"Found {null_count} documents with NULL embeddings")
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            return index_status
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								        except Exception:
 								            logger.exception("Failed to check index status")
 								            return {"status": "error", "message": "Failed to check index status"}
-												inner-search

											
										
										
											2024-01-29 03:27:30 +03:00
-												maintainance

											
										
										
											2025-06-16 20:20:23 +03:00
+								    async def close(self) -> None:
 								        """Close connections and release resources"""
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        if hasattr(self, "muvera_client") and self.muvera_client:
 								            try:
 								                await self.muvera_client.close()
-												muvera-index-fix

											
										
										
											2025-08-30 20:41:13 +03:00
+								                logger.info("Local Muvera client closed")
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								            except Exception as e:
 								                logger.warning(f"Error closing Muvera client: {e}")
-												maintainance

											
										
										
											2025-06-16 20:20:23 +03:00
+								        logger.info("Search service closed")
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												feat(search.py): change to txtai server, with ai model. And fix granian workers

											
										
										
											2025-03-05 20:08:21 +00:00
+								# Create the search service singleton
-												index-restruct

											
										
										
											2024-01-29 06:42:02 +03:00
+								search_service = SearchService()
-												inner-search-3

											
										
										
											2024-01-29 04:41:46 +03:00
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								# API-compatible functions for backward compatibility
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								async def search_text(text: str, limit: int = 200, offset: int = 0) -> list:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    """Search text using Muvera - backward compatibility function"""
-												feat: with three separate endpoints

											
										
										
											2025-04-23 18:24:00 -03:00
+								    if search_service.available:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        return await search_service.search(text, limit, offset)
 								    return []
-												feat: with three separate endpoints

											
										
										
											2025-04-23 18:24:00 -03:00
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								async def search_author_text(text: str, limit: int = 10, offset: int = 0) -> list:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    """Search authors using Muvera - backward compatibility function"""
-												feat: with three separate endpoints

											
										
										
											2025-04-23 18:24:00 -03:00
+								    if search_service.available:
 								        return await search_service.search_authors(text, limit, offset)
 								    return []
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
+								async def get_search_count(text: str) -> int:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    """Get count of search results - backward compatibility function"""
-												feat: with three separate endpoints

											
										
										
											2025-04-23 18:24:00 -03:00
+								    if not search_service.available:
 								        return 0
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    # Get results and count them
 								    results = await search_text(text, SEARCH_PREFETCH_SIZE, 0)
 								    return len(results)
-												feat: with three separate endpoints

											
										
										
											2025-04-23 18:24:00 -03:00
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
+								async def get_author_search_count(text: str) -> int:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    """Get count of author search results - backward compatibility function"""
-												feat: with three separate endpoints

											
										
										
											2025-04-23 18:24:00 -03:00
+								    if not search_service.available:
 								        return 0
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    # Get results and count them
 								    results = await search_author_text(text, SEARCH_PREFETCH_SIZE, 0)
 								    return len(results)
-												fmt

											
										
										
											2024-12-11 23:02:14 +03:00
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
+								async def initialize_search_index(shouts_data: list) -> None:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    """Initialize search index with existing data - backward compatibility function"""
 								    if not search_service.available:
 								        logger.warning("Search service not available for initialization")
-												feat: with index sync endpoints configs

											
										
										
											2025-03-25 13:31:45 -03:00
+								        return
-												style(search.py): with indexing message

											
										
										
											2025-04-24 18:45:00 -03:00
-												hardcopy-search-service-code

											
										
										
											2025-06-03 02:10:08 +03:00
+								    try:
-												search-index-restore

											
										
										
											2025-09-01 12:05:30 +03:00
+								        # Сначала пытаемся восстановить существующий индекс
 								        await search_service.async_init()
 								        # Проверяем нужна ли переиндексация
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        if len(shouts_data) > 0:
 								            await search_service.bulk_index(shouts_data)
 								            logger.info(f"Initialized search index with {len(shouts_data)} documents")
 								    except Exception as e:
 								        logger.exception(f"Failed to initialize search index: {e}")
-												auth fixes, search connected

											
										
										
											2025-05-22 04:34:30 +03:00
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
 								async def check_search_service() -> None:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    """Check if search service is available - backward compatibility function"""
 								    if search_service.available:
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
+								        logger.info("Search service is available and ready")
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    else:
 								        logger.warning("Search service is not available")
-												linted+fmt

											
										
										
											2025-05-29 12:37:39 +03:00
-												auth fixes, search connected

											
										
										
											2025-05-22 04:34:30 +03:00
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
+								async def initialize_search_index_background() -> None:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								    """Initialize search index in background - backward compatibility function"""
-												auth fixes, search connected

											
										
										
											2025-05-22 04:34:30 +03:00
+								    try:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        logger.info("Background search index initialization started")
 								        # This function is kept for compatibility but doesn't do much
 								        # since Muvera handles indexing automatically
 								        logger.info("Background search index initialization completed")
-												Improve topic sorting: add popular sorting by publications and authors count

											
										
										
											2025-06-02 02:56:11 +03:00
+								    except Exception:
-												fmt

											
										
										
											2025-08-23 10:47:52 +03:00
+								        logger.exception("Error in background search index initialization")