refact(search,reader): withput any kind of sorting
All checks were successful
Deploy on push / deploy (push) Successful in 42s
All checks were successful
Deploy on push / deploy (push) Successful in 42s
This commit is contained in:
parent
e7facf8d87
commit
fac43e5997
|
@ -396,38 +396,25 @@ async def load_shouts_search(_, info, text, options):
|
||||||
# Get search results with pagination
|
# Get search results with pagination
|
||||||
results = await search_text(text, limit, offset)
|
results = await search_text(text, limit, offset)
|
||||||
|
|
||||||
# If no results, return empty list
|
|
||||||
if not results:
|
if not results:
|
||||||
logger.info(f"No search results found for '{text}'")
|
logger.info(f"No search results found for '{text}'")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Extract IDs and scores
|
# Extract IDs in the order from the search engine
|
||||||
scores = {}
|
hits_ids = [str(sr.get("id")) for sr in results if sr.get("id")]
|
||||||
hits_ids = []
|
|
||||||
for sr in results:
|
|
||||||
shout_id = sr.get("id")
|
|
||||||
if shout_id:
|
|
||||||
shout_id = str(shout_id)
|
|
||||||
scores[shout_id] = sr.get("score")
|
|
||||||
hits_ids.append(shout_id)
|
|
||||||
|
|
||||||
# Query DB for only the IDs in the current page
|
# Query DB for only the IDs in the current page
|
||||||
q = query_with_stat(info)
|
q = query_with_stat(info)
|
||||||
q = q.filter(Shout.id.in_(hits_ids))
|
q = q.filter(Shout.id.in_(hits_ids))
|
||||||
q = apply_filters(q, options.get("filters", {}))
|
q = apply_filters(q, options.get("filters", {}))
|
||||||
|
|
||||||
#
|
|
||||||
shouts = get_shouts_with_links(info, q, len(hits_ids), 0)
|
shouts = get_shouts_with_links(info, q, len(hits_ids), 0)
|
||||||
|
|
||||||
# Add scores from search results
|
# Reorder shouts to match the order from hits_ids
|
||||||
for shout in shouts:
|
shouts_dict = {str(shout['id']): shout for shout in shouts}
|
||||||
shout_id = str(shout['id'])
|
ordered_shouts = [shouts_dict[shout_id] for shout_id in hits_ids if shout_id in shouts_dict]
|
||||||
shout["score"] = scores.get(shout_id, 0)
|
|
||||||
|
|
||||||
# Re-sort by search score to maintain ranking
|
return ordered_shouts
|
||||||
shouts.sort(key=lambda x: scores.get(str(x['id']), 0), reverse=True)
|
|
||||||
|
|
||||||
return shouts
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -26,9 +26,8 @@ SEARCH_CACHE_ENABLED = bool(
|
||||||
os.environ.get("SEARCH_CACHE_ENABLED", "true").lower() in ["true", "1", "yes"]
|
os.environ.get("SEARCH_CACHE_ENABLED", "true").lower() in ["true", "1", "yes"]
|
||||||
)
|
)
|
||||||
SEARCH_CACHE_TTL_SECONDS = int(
|
SEARCH_CACHE_TTL_SECONDS = int(
|
||||||
os.environ.get("SEARCH_CACHE_TTL_SECONDS", "900")
|
os.environ.get("SEARCH_CACHE_TTL_SECONDS", "300")
|
||||||
) # Default: 15 minutes
|
) # Default: 15 minutes
|
||||||
SEARCH_MIN_SCORE = float(os.environ.get("SEARCH_MIN_SCORE", "0.1"))
|
|
||||||
SEARCH_PREFETCH_SIZE = int(os.environ.get("SEARCH_PREFETCH_SIZE", "200"))
|
SEARCH_PREFETCH_SIZE = int(os.environ.get("SEARCH_PREFETCH_SIZE", "200"))
|
||||||
SEARCH_USE_REDIS = bool(
|
SEARCH_USE_REDIS = bool(
|
||||||
os.environ.get("SEARCH_USE_REDIS", "true").lower() in ["true", "1", "yes"]
|
os.environ.get("SEARCH_USE_REDIS", "true").lower() in ["true", "1", "yes"]
|
||||||
|
@ -221,9 +220,6 @@ class SearchService:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Search caching enabled using {cache_location} cache with TTL={SEARCH_CACHE_TTL_SECONDS}s"
|
f"Search caching enabled using {cache_location} cache with TTL={SEARCH_CACHE_TTL_SECONDS}s"
|
||||||
)
|
)
|
||||||
logger.info(
|
|
||||||
f"Minimum score filter: {SEARCH_MIN_SCORE}, prefetch size: {SEARCH_PREFETCH_SIZE}"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def info(self):
|
async def info(self):
|
||||||
"""Return information about search service"""
|
"""Return information about search service"""
|
||||||
|
@ -712,47 +708,32 @@ class SearchService:
|
||||||
# Not in cache or cache disabled, perform new search
|
# Not in cache or cache disabled, perform new search
|
||||||
try:
|
try:
|
||||||
search_limit = limit
|
search_limit = limit
|
||||||
search_offset = offset
|
|
||||||
|
|
||||||
if SEARCH_CACHE_ENABLED:
|
if SEARCH_CACHE_ENABLED:
|
||||||
search_limit = SEARCH_PREFETCH_SIZE
|
search_limit = SEARCH_PREFETCH_SIZE
|
||||||
search_offset = 0
|
|
||||||
else:
|
else:
|
||||||
search_limit = limit
|
search_limit = limit
|
||||||
search_offset = offset
|
|
||||||
|
|
||||||
response = await self.client.post(
|
response = await self.client.post(
|
||||||
"/search-combined",
|
"/search-combined",
|
||||||
json={"text": text, "limit": search_limit, "offset": search_offset},
|
json={"text": text, "limit": search_limit},
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
result = response.json()
|
result = response.json()
|
||||||
|
|
||||||
formatted_results = result.get("results", [])
|
formatted_results = result.get("results", [])
|
||||||
|
|
||||||
valid_results = []
|
# filter out non‑numeric IDs
|
||||||
for item in formatted_results:
|
valid_results = [r for r in formatted_results if r.get("id", "").isdigit()]
|
||||||
doc_id = item.get("id")
|
if len(valid_results) != len(formatted_results):
|
||||||
if doc_id and doc_id.isdigit():
|
formatted_results = valid_results
|
||||||
valid_results.append(item)
|
|
||||||
|
|
||||||
if len(valid_results) != len(formatted_results):
|
if len(valid_results) != len(formatted_results):
|
||||||
formatted_results = valid_results
|
formatted_results = valid_results
|
||||||
|
|
||||||
if SEARCH_MIN_SCORE > 0:
|
|
||||||
initial_count = len(formatted_results)
|
|
||||||
formatted_results = [
|
|
||||||
r
|
|
||||||
for r in formatted_results
|
|
||||||
if r.get("score", 0) >= SEARCH_MIN_SCORE
|
|
||||||
]
|
|
||||||
|
|
||||||
if SEARCH_CACHE_ENABLED:
|
if SEARCH_CACHE_ENABLED:
|
||||||
|
# Store the full prefetch batch, then page it
|
||||||
await self.cache.store(text, formatted_results)
|
await self.cache.store(text, formatted_results)
|
||||||
end_idx = offset + limit
|
return await self.cache.get(text, limit, offset)
|
||||||
page_results = formatted_results[offset:end_idx]
|
|
||||||
return page_results
|
|
||||||
|
|
||||||
return formatted_results
|
return formatted_results
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -783,12 +764,6 @@ class SearchService:
|
||||||
result = response.json()
|
result = response.json()
|
||||||
author_results = result.get("results", [])
|
author_results = result.get("results", [])
|
||||||
|
|
||||||
# Apply score filtering if needed
|
|
||||||
if SEARCH_MIN_SCORE > 0:
|
|
||||||
author_results = [
|
|
||||||
r for r in author_results if r.get("score", 0) >= SEARCH_MIN_SCORE
|
|
||||||
]
|
|
||||||
|
|
||||||
# Store in cache if enabled
|
# Store in cache if enabled
|
||||||
if SEARCH_CACHE_ENABLED:
|
if SEARCH_CACHE_ENABLED:
|
||||||
await self.cache.store(cache_key, author_results)
|
await self.cache.store(cache_key, author_results)
|
||||||
|
@ -829,7 +804,7 @@ search_service = SearchService()
|
||||||
# API-compatible function to perform a search
|
# API-compatible function to perform a search
|
||||||
|
|
||||||
|
|
||||||
async def search_text(text: str, limit: int = 50, offset: int = 0):
|
async def search_text(text: str, limit: int = 200, offset: int = 0):
|
||||||
payload = []
|
payload = []
|
||||||
if search_service.available:
|
if search_service.available:
|
||||||
payload = await search_service.search(text, limit, offset)
|
payload = await search_service.search(text, limit, offset)
|
||||||
|
@ -848,10 +823,8 @@ async def get_search_count(text: str):
|
||||||
if not search_service.available:
|
if not search_service.available:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if SEARCH_CACHE_ENABLED:
|
if SEARCH_CACHE_ENABLED and await search_service.cache.has_query(text):
|
||||||
cache_key = f"title:{text}"
|
return await search_service.cache.get_total_count(text)
|
||||||
if await search_service.cache.has_query(cache_key):
|
|
||||||
return await search_service.cache.get_total_count(cache_key)
|
|
||||||
|
|
||||||
# If not found in cache, fetch from endpoint
|
# If not found in cache, fetch from endpoint
|
||||||
return len(await search_text(text, SEARCH_PREFETCH_SIZE, 0))
|
return len(await search_text(text, SEARCH_PREFETCH_SIZE, 0))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user