Source code for reranking.rerank

"""
import datetime, uuid, logging, json, random
import asyncio

from django_core.config import Config
from rag_service.openai_service import make_openai_request, query_qdrant_collection
"""


#logger = logging.getLogger(__name__)


[docs]def parse_single_rerank_json(json_string: str): """ Parse single entry of reranked result. """ start_index = json_string.find("{") end_index = json_string.rfind("}") + 1 json_content = json_string[start_index:end_index].strip() return json.loads(json_content)
[docs]async def rerank_query(original_query, rephrased_query, email_id, retrieval_results=[]): """ Rerank the retrieved content chunks with the rephrased query from OpenAI. """ response_map = {} doc_map = None reranked_chunk_map = None rerank_start_time = None rerank_end_time = None rerank_request_start_time = None rerank_request_end_time = None rephrase_completion_tokens = 0 rephrase_prompt_tokens = 0 rephrase_total_tokens = 0 rerank_completion_tokens = 0 rerank_prompt_tokens = 0 rerank_total_tokens = 0 is_rerank_response_parsed = False rerank_exception = "" rerank_retries = 0 response_map.update( { "original_query": original_query, "retrieved_chunks": doc_map, "reranked_chunks": reranked_chunk_map, "rerank_start_time": rerank_start_time, "rerank_end_time": rerank_end_time, "rerank_request_start_time": rerank_request_start_time, "rerank_request_end_time": rerank_request_end_time, "completion_tokens": rerank_completion_tokens, "prompt_tokens": rerank_prompt_tokens, "total_tokens": rerank_total_tokens, "is_rerank_response_parsed": False, "rerank_exception": rerank_exception, "rerank_retries": rerank_retries, } ) rerank_start_time = datetime.datetime.now() # retrieval_results = query_qdrant_collection(rephrased_query, "coffee", search_type="text", k=12) docs_for_reranking = [] doc_map = {} if retrieval_results == []: return response_map for data in retrieval_results: # import pdb; pdb.set_trace() chunk_id = random.randint(1, 1000) doc_map.update( { # data.id: { # "text": data.payload.get("text", ""), # "metadata": data.payload.get("metadata", {}), # "score": data.score, # } chunk_id: { "text": data.get("document", ""), "metadata": data.get("cmetadata", {}), "score": data.get("similarity"), } } ) docs_for_reranking.append( { # "id": data.id, # "text_chunk": data.payload.get("text", ""), "id": chunk_id, "text_chunk": data.get("document", ""), } ) sorted_reranked_list = [] rerank_prompt_list = [ Config.RERANKING_PROMPT_SINGLE_TEMPLATE.format( # crop=crop, json_example=Config.RERANK_SINGLE_JSON_EXAMPLE, text=rerank_doc, question=rephrased_query, ) for rerank_doc in docs_for_reranking ] rerank_request_start_time = datetime.datetime.now() reranking_results = await asyncio.gather( *(make_openai_request(prompt, model=Config.GPT_4_MODEL) for prompt in rerank_prompt_list) ) rerank_request_end_time = datetime.datetime.now() is_rerank_response_parsed = True reranked_list = [] for response, exception, retries in reranking_results: if response: rerank_completion_tokens += response.usage.completion_tokens rerank_prompt_tokens += response.usage.prompt_tokens rerank_total_tokens += response.usage.total_tokens try: response_obj = parse_single_rerank_json(response.choices[0].message.content) except Exception as error: logger.error(error, exc_info=True) is_rerank_response_parsed = False continue if response_obj.get("classification") == "YES": reranked_list.append(response_obj) else: is_rerank_response_parsed = False rerank_retries += retries rerank_exception += exception + "\n" sorted_reranked_list = sorted(reranked_list, key=lambda x: x["relevance_score"]) rerank_end_time = datetime.datetime.now() reranked_chunk_map = {} context_chunks = [] for item in sorted_reranked_list: if len(context_chunks) < 6: context_chunks.append(doc_map.get(int(item.get("id"))).get("text")) reranked_chunk_map.update( { item.get("id"): { "chunk": doc_map.get(int(item.get("id"))), "rank": item.get("relevance_score"), } } ) response_map.update( { "original_query": original_query, "retrieved_chunks": doc_map, "reranked_chunks": reranked_chunk_map, "rerank_start_time": rerank_start_time, "rerank_end_time": rerank_end_time, "rerank_request_start_time": rerank_request_start_time, "rerank_request_end_time": rerank_request_end_time, "completion_tokens": rerank_completion_tokens, "prompt_tokens": rerank_prompt_tokens, "total_tokens": rerank_total_tokens, "is_rerank_response_parsed": False, "rerank_exception": rerank_exception, "rerank_retries": rerank_retries, "context_chunks": context_chunks, } ) return response_map