GPTCache: [Bug]: Using ConversationalRetrievalChain with question_generator and LLMChain in Langchain does not produce cache
Current Behavior
This code follow the documentation step to add cache to ConversationalRetrievalChain with Langchain but it’s not working properly
from gptcache.adapter.langchain_models import LangChainChat
from langchain import ElasticVectorSearch
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import AzureChatOpenAI
from langchain.schema import HumanMessage, SystemMessage
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain
import openai
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
from gptcache.processor.pre import get_messages_last_content
from gptcache import cache
from gptcache.manager import CacheBase, VectorBase, get_data_manager
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
from gptcache.embedding import OpenAI
openai_client = OpenAI()
load_dotenv()
openai.api_key = os.getenv("AZURE_OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_type = os.getenv("OPENAI_API_TYPE")
openai.api_version = os.getenv("AZURE_OPENAI_API_VERSION")
openai_client = OpenAI(model=os.getenv("EMBEDDING_MODEL_DEPLOYMENT"))
# get the content(only question) form the prompt to cache
def get_content_func(data, **_):
return data.get("prompt").split("Question")[-1]
cache_base = CacheBase('sqlite')
vector_base = VectorBase('faiss', dimension=openai_client.dimension, collection_name='chatbot')
data_manager = get_data_manager(cache_base, vector_base)
cache.init(
pre_embedding_func=get_messages_last_content,
embedding_func=openai_client.to_embeddings,
data_manager=data_manager,
similarity_evaluation=SearchDistanceEvaluation(),
)
cache.set_openai_key()
gpt_client = LangChainChat(chat=AzureChatOpenAI(
openai_api_base=os.getenv("OPENAI_API_BASE"),
openai_api_version="2023-03-15-preview",
deployment_name=os.getenv("CHAT_COMPLETION_DEPLOYMENT"),
openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
openai_api_type="azure"
))
QUESTION_ANSWER_PROMPT = """
[INSTRUCTION]: You are a helpful chatbot that has to satisfy user requests in its
original language in the [USER REQUEST] section to the best of your capabilities.
[SOURCES OF INFORMATION]:{context}
[USER REQUEST]: {question}"""
question_prompt_template = PromptTemplate(template=QUESTION_ANSWER_PROMPT, input_variables=["context", "question"])
CONDENSE_PROMPT = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
condense_prompt_template = PromptTemplate.from_template(CONDENSE_PROMPT)
doc_chain = load_qa_chain(gpt_client, chain_type="stuff", prompt=question_prompt_template)
question_generator = LLMChain(llm=gpt_client, prompt=condense_prompt_template)
question_answer_chain = ConversationalRetrievalChain(retriever=elastic_client._es_client.as_retriever(search_type="similarity", search_kwargs={"k": 12}), combine_docs_chain=doc_chain, return_source_documents=True, question_generator=question_generator, return_generated_question=True, verbose=True)
vectordbkwargs = {"search_distance": 0.7}
chat_history=""
user_query = "Who won the competition?"
chat_history=""
start_time = time.time()
result = question_answer_chain({"question": user_query, "chat_history": chat_history ,"vectordbkwargs": vectordbkwargs})
print("Time consuming: {:.2f}s".format(time.time() - start_time))
this return 2.77s repeating the same query return 4.55s so there is no cache is working
Expected Behavior
Repeating. the same query should give around 0 time since it will returned from the cache
Steps To Reproduce
Python = 3.9.7
GPTcache = latest (v0.1.35)
langchain = latest (v0.0.229)
Environment
Windows,Jupyter Notebook
Anything else?
No response
About this issue
- Original URL
- State: closed
- Created a year ago
- Comments: 23
@SimFG I found the solution is to set the max_distance value
I appreciate the help
@SimFG so I did create this function to extract only the query from the prompt and pass it pre_embedding_func but it sill give the same first cached answer for every query
hi, @Yafaa5 I run the demo code and the cache work well. my all code:
I have commented the diff code, like diff 1, diff 2, diff 3.
The test result:
So i guess it maybe cause the unstable network