2024-02-29 11:04:24 +00:00
|
|
|
import asyncio
|
2022-11-17 19:53:58 +00:00
|
|
|
import json
|
2024-06-02 13:36:12 +00:00
|
|
|
import logging
|
2024-06-02 14:01:22 +00:00
|
|
|
import os
|
2025-03-12 15:06:09 +00:00
|
|
|
import httpx
|
2023-12-17 20:30:20 +00:00
|
|
|
|
2024-08-07 06:51:09 +00:00
|
|
|
from services.redis import redis
|
2024-08-09 06:37:06 +00:00
|
|
|
from utils.encoders import CustomJSONEncoder
|
2022-10-04 00:32:29 +00:00
|
|
|
|
2024-06-02 13:36:12 +00:00
|
|
|
# Set redis logging level to suppress DEBUG messages
|
|
|
|
logger = logging.getLogger("search")
|
|
|
|
logger.setLevel(logging.WARNING)
|
|
|
|
|
2025-03-12 15:06:09 +00:00
|
|
|
REDIS_TTL = 86400 # 1 day in seconds
|
2024-01-29 00:27:30 +00:00
|
|
|
|
2025-03-12 15:06:09 +00:00
|
|
|
# Configuration for search service
|
2025-03-05 20:08:21 +00:00
|
|
|
SEARCH_ENABLED = bool(os.environ.get("SEARCH_ENABLED", "true").lower() in ["true", "1", "yes"])
|
2025-03-12 16:07:27 +00:00
|
|
|
TXTAI_SERVICE_URL = os.environ.get("TXTAI_SERVICE_URL")
|
2024-05-18 08:52:17 +00:00
|
|
|
|
2024-02-29 11:09:50 +00:00
|
|
|
|
2024-01-29 01:09:54 +00:00
|
|
|
class SearchService:
|
2025-03-12 15:06:09 +00:00
|
|
|
def __init__(self):
|
2025-03-12 16:11:19 +00:00
|
|
|
logger.info("Initializing search service...")
|
2025-03-05 20:08:21 +00:00
|
|
|
self.available = SEARCH_ENABLED
|
2025-03-12 16:11:19 +00:00
|
|
|
self.client = httpx.AsyncClient(timeout=30.0, base_url=TXTAI_SERVICE_URL)
|
2025-03-05 20:08:21 +00:00
|
|
|
|
|
|
|
if not self.available:
|
2025-03-12 15:06:09 +00:00
|
|
|
logger.info("Search disabled (SEARCH_ENABLED = False)")
|
2025-03-05 20:08:21 +00:00
|
|
|
|
2024-05-18 08:22:13 +00:00
|
|
|
async def info(self):
|
2025-03-05 20:08:21 +00:00
|
|
|
"""Return information about search service"""
|
|
|
|
if not self.available:
|
2024-11-22 17:32:14 +00:00
|
|
|
return {"status": "disabled"}
|
2024-12-11 20:02:14 +00:00
|
|
|
|
2024-11-22 17:23:45 +00:00
|
|
|
try:
|
2025-03-12 15:06:09 +00:00
|
|
|
response = await self.client.get("/info")
|
|
|
|
response.raise_for_status()
|
|
|
|
return response.json()
|
2024-11-22 17:32:14 +00:00
|
|
|
except Exception as e:
|
|
|
|
logger.error(f"Failed to get search info: {e}")
|
|
|
|
return {"status": "error", "message": str(e)}
|
2024-01-29 01:41:46 +00:00
|
|
|
|
2025-03-05 20:08:21 +00:00
|
|
|
def is_ready(self):
|
2025-03-12 15:06:09 +00:00
|
|
|
"""Check if service is available"""
|
2025-03-12 16:11:19 +00:00
|
|
|
return self.available
|
2024-01-29 00:27:30 +00:00
|
|
|
|
2024-01-29 03:42:02 +00:00
|
|
|
def index(self, shout):
|
2025-03-05 20:08:21 +00:00
|
|
|
"""Index a single document"""
|
|
|
|
if not self.available:
|
2024-11-22 17:32:14 +00:00
|
|
|
return
|
2024-12-11 20:02:14 +00:00
|
|
|
|
2025-03-12 15:06:09 +00:00
|
|
|
logger.info(f"Indexing post {shout.id}")
|
2025-03-05 20:08:21 +00:00
|
|
|
|
|
|
|
# Start in background to not block
|
|
|
|
asyncio.create_task(self.perform_index(shout))
|
|
|
|
|
|
|
|
async def perform_index(self, shout):
|
|
|
|
"""Actually perform the indexing operation"""
|
2025-03-12 15:06:09 +00:00
|
|
|
if not self.available:
|
|
|
|
return
|
2025-03-05 20:08:21 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
# Combine all text fields
|
|
|
|
text = " ".join(filter(None, [
|
|
|
|
shout.title or "",
|
|
|
|
shout.subtitle or "",
|
|
|
|
shout.lead or "",
|
|
|
|
shout.body or "",
|
|
|
|
shout.media or ""
|
|
|
|
]))
|
|
|
|
|
2025-03-12 16:11:19 +00:00
|
|
|
# Send to txtai service
|
|
|
|
response = await self.client.post(
|
|
|
|
"/index",
|
|
|
|
json={"id": str(shout.id), "text": text}
|
2025-03-05 20:08:21 +00:00
|
|
|
)
|
2025-03-12 16:11:19 +00:00
|
|
|
response.raise_for_status()
|
2025-03-12 15:06:09 +00:00
|
|
|
logger.info(f"Post {shout.id} successfully indexed")
|
2025-03-05 20:08:21 +00:00
|
|
|
except Exception as e:
|
|
|
|
logger.error(f"Indexing error for shout {shout.id}: {e}")
|
2024-04-08 07:23:54 +00:00
|
|
|
|
2025-03-05 20:08:21 +00:00
|
|
|
async def bulk_index(self, shouts):
|
|
|
|
"""Index multiple documents at once"""
|
|
|
|
if not self.available or not shouts:
|
|
|
|
return
|
|
|
|
|
|
|
|
documents = []
|
|
|
|
for shout in shouts:
|
|
|
|
text = " ".join(filter(None, [
|
|
|
|
shout.title or "",
|
|
|
|
shout.subtitle or "",
|
|
|
|
shout.lead or "",
|
|
|
|
shout.body or "",
|
|
|
|
shout.media or ""
|
|
|
|
]))
|
2025-03-12 15:06:09 +00:00
|
|
|
documents.append({"id": str(shout.id), "text": text})
|
2025-03-05 20:08:21 +00:00
|
|
|
|
|
|
|
try:
|
2025-03-12 16:11:19 +00:00
|
|
|
response = await self.client.post(
|
|
|
|
"/bulk-index",
|
|
|
|
json={"documents": documents}
|
|
|
|
)
|
|
|
|
response.raise_for_status()
|
|
|
|
logger.info(f"Bulk indexed {len(documents)} documents")
|
2025-03-05 20:08:21 +00:00
|
|
|
except Exception as e:
|
|
|
|
logger.error(f"Bulk indexing error: {e}")
|
2024-01-29 00:27:30 +00:00
|
|
|
|
2024-01-29 06:45:00 +00:00
|
|
|
async def search(self, text, limit, offset):
|
2025-03-05 20:08:21 +00:00
|
|
|
"""Search documents"""
|
|
|
|
if not self.available:
|
2024-11-22 17:32:14 +00:00
|
|
|
return []
|
2025-03-05 20:08:21 +00:00
|
|
|
|
|
|
|
# Check Redis cache first
|
|
|
|
redis_key = f"search:{text}:{offset}+{limit}"
|
|
|
|
cached = await redis.get(redis_key)
|
|
|
|
if cached:
|
|
|
|
return json.loads(cached)
|
|
|
|
|
2025-03-12 15:06:09 +00:00
|
|
|
logger.info(f"Searching: {text} {offset}+{limit}")
|
2025-03-05 20:08:21 +00:00
|
|
|
|
|
|
|
try:
|
2025-03-12 16:11:19 +00:00
|
|
|
response = await self.client.post(
|
|
|
|
"/search",
|
|
|
|
json={"text": text, "limit": limit, "offset": offset}
|
2025-03-12 15:06:09 +00:00
|
|
|
)
|
2025-03-12 16:11:19 +00:00
|
|
|
response.raise_for_status()
|
|
|
|
result = response.json()
|
2025-03-12 15:06:09 +00:00
|
|
|
formatted_results = result.get("results", [])
|
2025-03-05 20:08:21 +00:00
|
|
|
|
|
|
|
# Cache results
|
|
|
|
if formatted_results:
|
2024-05-18 08:00:01 +00:00
|
|
|
await redis.execute(
|
|
|
|
"SETEX",
|
|
|
|
redis_key,
|
|
|
|
REDIS_TTL,
|
2025-03-05 20:08:21 +00:00
|
|
|
json.dumps(formatted_results, cls=CustomJSONEncoder),
|
2024-05-18 08:00:01 +00:00
|
|
|
)
|
2025-03-05 20:08:21 +00:00
|
|
|
return formatted_results
|
|
|
|
except Exception as e:
|
|
|
|
logger.error(f"Search error: {e}")
|
|
|
|
return []
|
2024-01-29 00:27:30 +00:00
|
|
|
|
2024-02-29 11:09:50 +00:00
|
|
|
|
2025-03-05 20:08:21 +00:00
|
|
|
# Create the search service singleton
|
2024-01-29 03:42:02 +00:00
|
|
|
search_service = SearchService()
|
2024-01-29 01:41:46 +00:00
|
|
|
|
2024-02-29 11:09:50 +00:00
|
|
|
|
2025-03-05 20:08:21 +00:00
|
|
|
# Keep the API exactly the same to maintain compatibility
|
2024-01-29 01:41:46 +00:00
|
|
|
async def search_text(text: str, limit: int = 50, offset: int = 0):
|
|
|
|
payload = []
|
2025-03-05 20:08:21 +00:00
|
|
|
if search_service.available:
|
2024-01-29 07:48:36 +00:00
|
|
|
payload = await search_service.search(text, limit, offset)
|
2024-01-29 01:41:46 +00:00
|
|
|
return payload
|
2024-11-22 17:23:45 +00:00
|
|
|
|
2024-12-11 20:02:14 +00:00
|
|
|
|
2025-03-12 16:11:19 +00:00
|
|
|
# Function to initialize search with existing data
|
2025-03-05 20:08:21 +00:00
|
|
|
async def initialize_search_index(shouts_data):
|
|
|
|
"""Initialize search index with existing data during application startup"""
|
|
|
|
if SEARCH_ENABLED:
|
|
|
|
logger.info("Initializing search index with existing data...")
|
|
|
|
await search_service.bulk_index(shouts_data)
|
2025-03-12 15:06:09 +00:00
|
|
|
logger.info(f"Search index initialized with {len(shouts_data)} documents")
|