This commit is contained in:
2024-04-17 18:32:23 +03:00
parent 937b154c6b
commit c25d7e3ab6
39 changed files with 986 additions and 926 deletions

View File

@@ -8,51 +8,51 @@ from services.encoders import CustomJSONEncoder
from services.logger import root_logger as logger
from services.rediscache import redis
ELASTIC_HOST = os.environ.get('ELASTIC_HOST', '').replace('https://', '')
ELASTIC_USER = os.environ.get('ELASTIC_USER', '')
ELASTIC_PASSWORD = os.environ.get('ELASTIC_PASSWORD', '')
ELASTIC_PORT = os.environ.get('ELASTIC_PORT', 9200)
ELASTIC_AUTH = f'{ELASTIC_USER}:{ELASTIC_PASSWORD}' if ELASTIC_USER else ''
ELASTIC_HOST = os.environ.get("ELASTIC_HOST", "").replace("https://", "")
ELASTIC_USER = os.environ.get("ELASTIC_USER", "")
ELASTIC_PASSWORD = os.environ.get("ELASTIC_PASSWORD", "")
ELASTIC_PORT = os.environ.get("ELASTIC_PORT", 9200)
ELASTIC_AUTH = f"{ELASTIC_USER}:{ELASTIC_PASSWORD}" if ELASTIC_USER else ""
ELASTIC_URL = os.environ.get(
'ELASTIC_URL', f'https://{ELASTIC_AUTH}@{ELASTIC_HOST}:{ELASTIC_PORT}'
"ELASTIC_URL", f"https://{ELASTIC_AUTH}@{ELASTIC_HOST}:{ELASTIC_PORT}"
)
REDIS_TTL = 86400 # 1 day in seconds
index_settings = {
'settings': {
'index': {'number_of_shards': 1, 'auto_expand_replicas': '0-all'},
'analysis': {
'analyzer': {
'ru': {
'tokenizer': 'standard',
'filter': ['lowercase', 'ru_stop', 'ru_stemmer'],
"settings": {
"index": {"number_of_shards": 1, "auto_expand_replicas": "0-all"},
"analysis": {
"analyzer": {
"ru": {
"tokenizer": "standard",
"filter": ["lowercase", "ru_stop", "ru_stemmer"],
}
},
'filter': {
'ru_stemmer': {'type': 'stemmer', 'language': 'russian'},
'ru_stop': {'type': 'stop', 'stopwords': '_russian_'},
"filter": {
"ru_stemmer": {"type": "stemmer", "language": "russian"},
"ru_stop": {"type": "stop", "stopwords": "_russian_"},
},
},
},
'mappings': {
'properties': {
'body': {'type': 'text', 'analyzer': 'ru'},
'title': {'type': 'text', 'analyzer': 'ru'},
'subtitle': {'type': 'text', 'analyzer': 'ru'},
'lead': {'type': 'text', 'analyzer': 'ru'},
"mappings": {
"properties": {
"body": {"type": "text", "analyzer": "ru"},
"title": {"type": "text", "analyzer": "ru"},
"subtitle": {"type": "text", "analyzer": "ru"},
"lead": {"type": "text", "analyzer": "ru"},
# 'author': {'type': 'text'},
}
},
}
expected_mapping = index_settings['mappings']
expected_mapping = index_settings["mappings"]
# Create an event loop
search_loop = asyncio.get_event_loop()
class SearchService:
def __init__(self, index_name='search_index'):
def __init__(self, index_name="search_index"):
self.index_name = index_name
self.client = None
self.lock = asyncio.Lock() # Create an asyncio lock
@@ -61,7 +61,7 @@ class SearchService:
if ELASTIC_HOST:
try:
self.client = OpenSearch(
hosts=[{'host': ELASTIC_HOST, 'port': ELASTIC_PORT}],
hosts=[{"host": ELASTIC_HOST, "port": ELASTIC_PORT}],
http_compress=True,
http_auth=(ELASTIC_USER, ELASTIC_PASSWORD),
use_ssl=True,
@@ -70,52 +70,52 @@ class SearchService:
ssl_show_warn=False,
# ca_certs = ca_certs_path
)
logger.info(' Клиент OpenSearch.org подключен')
logger.info(" Клиент OpenSearch.org подключен")
# Create a task and run it in the event loop
search_loop.create_task(self.check_index())
except Exception as exc:
logger.error(f' {exc}')
logger.error(f" {exc}")
self.client = None
def info(self):
if isinstance(self.client, OpenSearch):
logger.info(' Поиск подключен') # : {self.client.info()}')
logger.info(" Поиск подключен") # : {self.client.info()}')
else:
logger.info(' * Задайте переменные среды для подключения к серверу поиска')
logger.info(" * Задайте переменные среды для подключения к серверу поиска")
def delete_index(self):
if self.client:
logger.debug(f' Удаляем индекс {self.index_name}')
logger.debug(f" Удаляем индекс {self.index_name}")
self.client.indices.delete(index=self.index_name, ignore_unavailable=True)
def create_index(self):
if self.client:
logger.debug(f'Создается индекс: {self.index_name}')
logger.debug(f"Создается индекс: {self.index_name}")
self.delete_index()
self.client.indices.create(index=self.index_name, body=index_settings)
logger.debug(f'Индекс {self.index_name} создан')
logger.debug(f"Индекс {self.index_name} создан")
async def check_index(self):
if self.client:
logger.debug(f' Проверяем индекс {self.index_name}...')
logger.debug(f" Проверяем индекс {self.index_name}...")
if not self.client.indices.exists(index=self.index_name):
self.create_index()
self.client.indices.put_mapping(
index=self.index_name, body=expected_mapping
)
else:
logger.info(f'найден существующий индекс {self.index_name}')
logger.info(f"найден существующий индекс {self.index_name}")
# Check if the mapping is correct, and recreate the index if needed
result = self.client.indices.get_mapping(index=self.index_name)
if isinstance(result, str):
result = json.loads(result)
if isinstance(result, dict):
mapping = result.get('mapping')
mapping = result.get("mapping")
if mapping and mapping != expected_mapping:
logger.debug(f' найдена структура индексации: {mapping}')
logger.debug(f" найдена структура индексации: {mapping}")
logger.warn(
' требуется другая структура индексации, переиндексация'
" требуется другая структура индексации, переиндексация"
)
await self.recreate_index()
@@ -130,28 +130,30 @@ class SearchService:
def index(self, shout):
if self.client:
id_ = str(shout.id)
logger.debug(f' Индексируем пост {id_}')
logger.debug(f" Индексируем пост {id_}")
asyncio.create_task(self.perform_index(shout))
async def perform_index(self, shout):
if self.client:
self.client.index(index=self.index_name, id=str(shout.id), body=shout.dict())
self.client.index(
index=self.index_name, id=str(shout.id), body=shout.dict()
)
async def search(self, text, limit, offset):
logger.debug(f' Ищем: {text}')
search_body = {'query': {'match': {'_all': text}}}
logger.debug(f" Ищем: {text}")
search_body = {"query": {"match": {"_all": text}}}
if self.client:
search_response = self.client.search(
index=self.index_name, body=search_body, size=limit, from_=offset
)
hits = search_response['hits']['hits']
hits = search_response["hits"]["hits"]
results = [{**hit['_source'], 'score': hit['_score']} for hit in hits]
results = [{**hit["_source"], "score": hit["_score"]} for hit in hits]
# Use Redis as cache with TTL
redis_key = f'search:{text}'
redis_key = f"search:{text}"
await redis.execute(
'SETEX',
"SETEX",
redis_key,
REDIS_TTL,
json.dumps(results, cls=CustomJSONEncoder),