core/services/pretopic.py

91 lines
3.3 KiB
Python
Raw Normal View History

import asyncio
2025-02-09 19:26:50 +00:00
import concurrent.futures
from concurrent.futures import Future
from typing import Any, Optional
try:
from utils.logger import root_logger as logger
except ImportError:
import logging
logger = logging.getLogger(__name__)
class PreTopicService:
def __init__(self) -> None:
self.topic_embeddings: Optional[Any] = None
self.search_embeddings: Optional[Any] = None
self._executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
self._initialization_future: Optional[Future[None]] = None
def _ensure_initialization(self) -> None:
"""Ensure embeddings are initialized"""
2025-02-09 19:26:50 +00:00
if self._initialization_future is None:
self._initialization_future = self._executor.submit(self._prepare_embeddings)
def _prepare_embeddings(self) -> None:
"""Prepare embeddings for topic and search functionality"""
2025-02-09 19:26:50 +00:00
try:
from txtai.embeddings import Embeddings # type: ignore[import-untyped]
# Initialize topic embeddings
self.topic_embeddings = Embeddings(
{
"method": "transformers",
"path": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
}
)
# Initialize search embeddings
self.search_embeddings = Embeddings(
{
"method": "transformers",
"path": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
}
)
logger.info("PreTopic embeddings initialized successfully")
except ImportError:
logger.warning("txtai.embeddings not available, PreTopicService disabled")
2025-02-09 19:26:50 +00:00
except Exception as e:
logger.error(f"Failed to initialize embeddings: {e}")
async def suggest_topics(self, text: str) -> list[dict[str, Any]]:
"""Suggest topics based on text content"""
if self.topic_embeddings is None:
2025-02-09 19:26:50 +00:00
return []
2025-02-11 09:00:35 +00:00
2025-02-09 19:26:50 +00:00
try:
self._ensure_initialization()
if self._initialization_future:
await asyncio.wrap_future(self._initialization_future)
if self.topic_embeddings is not None:
results = self.topic_embeddings.search(text, 1)
if results:
return [{"topic": result["text"], "score": result["score"]} for result in results]
except Exception as e:
logger.error(f"Error suggesting topics: {e}")
return []
2025-02-11 09:00:35 +00:00
async def search_content(self, query: str, limit: int = 10) -> list[dict[str, Any]]:
"""Search content using embeddings"""
if self.search_embeddings is None:
return []
2025-02-09 19:26:50 +00:00
try:
self._ensure_initialization()
if self._initialization_future:
await asyncio.wrap_future(self._initialization_future)
if self.search_embeddings is not None:
results = self.search_embeddings.search(query, limit)
if results:
return [{"content": result["text"], "score": result["score"]} for result in results]
2025-02-09 19:26:50 +00:00
except Exception as e:
logger.error(f"Error searching content: {e}")
return []
2025-02-09 19:26:50 +00:00
# Global instance
pretopic_service = PreTopicService()