refact(search,reader): withput any kind of sorting

2025-04-24 21:00:41 -03:00 · 2025-04-24 21:00:41 -03:00 · fac43e5997
commit fac43e5997
parent e7facf8d87
2 changed files with 18 additions and 58 deletions
--- a/resolvers/reader.py
+++ b/resolvers/reader.py
@ -396,38 +396,25 @@ async def load_shouts_search(_, info, text, options):
        # Get search results with pagination
        results = await search_text(text, limit, offset)
        # If no results, return empty list
        if not results:
            logger.info(f"No search results found for '{text}'")
            return []
-        # Extract IDs and scores
+        # Extract IDs in the order from the search engine
-        scores = {}
+        hits_ids = [str(sr.get("id")) for sr in results if sr.get("id")]
        hits_ids = []
        for sr in results:
            shout_id = sr.get("id")
            if shout_id:
                shout_id = str(shout_id)
                scores[shout_id] = sr.get("score")
                hits_ids.append(shout_id)
        # Query DB for only the IDs in the current page
        q = query_with_stat(info)
        q = q.filter(Shout.id.in_(hits_ids))
        q = apply_filters(q, options.get("filters", {}))
        #
        shouts = get_shouts_with_links(info, q, len(hits_ids), 0)
-        # Add scores from search results
+        # Reorder shouts to match the order from hits_ids
-        for shout in shouts:
+        shouts_dict = {str(shout['id']): shout for shout in shouts}
-            shout_id = str(shout['id'])
+        ordered_shouts = [shouts_dict[shout_id] for shout_id in hits_ids if shout_id in shouts_dict]
            shout["score"] = scores.get(shout_id, 0)
-        # Re-sort by search score to maintain ranking
+        return ordered_shouts
        shouts.sort(key=lambda x: scores.get(str(x['id']), 0), reverse=True)
        return shouts
    return []
--- a/services/search.py
+++ b/services/search.py
@ -26,9 +26,8 @@ SEARCH_CACHE_ENABLED = bool(
    os.environ.get("SEARCH_CACHE_ENABLED", "true").lower() in ["true", "1", "yes"]
 )
 SEARCH_CACHE_TTL_SECONDS = int(
-    os.environ.get("SEARCH_CACHE_TTL_SECONDS", "900")
+    os.environ.get("SEARCH_CACHE_TTL_SECONDS", "300")
 )  # Default: 15 minutes
 SEARCH_MIN_SCORE = float(os.environ.get("SEARCH_MIN_SCORE", "0.1"))
 SEARCH_PREFETCH_SIZE = int(os.environ.get("SEARCH_PREFETCH_SIZE", "200"))
 SEARCH_USE_REDIS = bool(
    os.environ.get("SEARCH_USE_REDIS", "true").lower() in ["true", "1", "yes"]
@ -221,9 +220,6 @@ class SearchService:
            logger.info(
                f"Search caching enabled using {cache_location} cache with TTL={SEARCH_CACHE_TTL_SECONDS}s"
            )
            logger.info(
                f"Minimum score filter: {SEARCH_MIN_SCORE}, prefetch size: {SEARCH_PREFETCH_SIZE}"
            )
    async def info(self):
        """Return information about search service"""
@ -712,47 +708,32 @@ class SearchService:
        # Not in cache or cache disabled, perform new search
        try:
            search_limit = limit
            search_offset = offset
            if SEARCH_CACHE_ENABLED:
                search_limit = SEARCH_PREFETCH_SIZE
                search_offset = 0
            else:
                search_limit = limit
                search_offset = offset
            response = await self.client.post(
                "/search-combined",
-                json={"text": text, "limit": search_limit, "offset": search_offset},
+                json={"text": text, "limit": search_limit},
            )
            response.raise_for_status()
            result = response.json()
            formatted_results = result.get("results", [])
-            valid_results = []
+            # filter out non‑numeric IDs
-            for item in formatted_results:
+            valid_results = [r for r in formatted_results if r.get("id", "").isdigit()]
-                doc_id = item.get("id")
+            if len(valid_results) != len(formatted_results):
-                if doc_id and doc_id.isdigit():
+                formatted_results = valid_results
                    valid_results.append(item)
            if len(valid_results) != len(formatted_results):
                formatted_results = valid_results
            if SEARCH_MIN_SCORE > 0:
                initial_count = len(formatted_results)
                formatted_results = [
                    r
                    for r in formatted_results
                    if r.get("score", 0) >= SEARCH_MIN_SCORE
                ]
            if SEARCH_CACHE_ENABLED:
                # Store the full prefetch batch, then page it
                await self.cache.store(text, formatted_results)
-                end_idx = offset + limit
+                return await self.cache.get(text, limit, offset)
                page_results = formatted_results[offset:end_idx]
                return page_results
            return formatted_results
        except Exception as e:
@ -783,12 +764,6 @@ class SearchService:
            result = response.json()
            author_results = result.get("results", [])
            # Apply score filtering if needed
            if SEARCH_MIN_SCORE > 0:
                author_results = [
                    r for r in author_results if r.get("score", 0) >= SEARCH_MIN_SCORE
                ]
            # Store in cache if enabled
            if SEARCH_CACHE_ENABLED:
                await self.cache.store(cache_key, author_results)
@ -829,7 +804,7 @@ search_service = SearchService()
 # API-compatible function to perform a search
-async def search_text(text: str, limit: int = 50, offset: int = 0):
+async def search_text(text: str, limit: int = 200, offset: int = 0):
    payload = []
    if search_service.available:
        payload = await search_service.search(text, limit, offset)
@ -848,10 +823,8 @@ async def get_search_count(text: str):
    if not search_service.available:
        return 0
-    if SEARCH_CACHE_ENABLED:
+    if SEARCH_CACHE_ENABLED and await search_service.cache.has_query(text):
-        cache_key = f"title:{text}"
+        return await search_service.cache.get_total_count(text)
        if await search_service.cache.has_query(cache_key):
            return await search_service.cache.get_total_count(cache_key)
    # If not found in cache, fetch from endpoint
    return len(await search_text(text, SEARCH_PREFETCH_SIZE, 0))