GPTCache: [Bug]: Using ConversationalRetrievalChain with question_generator and LLMChain in Langchain does not produce cache

Current Behavior

This code follow the documentation step to add cache to ConversationalRetrievalChain with Langchain but it’s not working properly

from gptcache.adapter.langchain_models import LangChainChat
from langchain import ElasticVectorSearch
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import AzureChatOpenAI
from langchain.schema import HumanMessage, SystemMessage
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain
import openai 
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
from gptcache.processor.pre import get_messages_last_content
from gptcache import cache
from gptcache.manager import CacheBase, VectorBase, get_data_manager
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
from gptcache.embedding import OpenAI

openai_client = OpenAI()
load_dotenv()

openai.api_key = os.getenv("AZURE_OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_type = os.getenv("OPENAI_API_TYPE")
openai.api_version = os.getenv("AZURE_OPENAI_API_VERSION")

openai_client = OpenAI(model=os.getenv("EMBEDDING_MODEL_DEPLOYMENT"))

# get the content(only question) form the prompt to cache
def get_content_func(data, **_):
    return data.get("prompt").split("Question")[-1]

cache_base = CacheBase('sqlite')
vector_base = VectorBase('faiss', dimension=openai_client.dimension, collection_name='chatbot')
data_manager = get_data_manager(cache_base, vector_base)
cache.init(
    pre_embedding_func=get_messages_last_content,
    embedding_func=openai_client.to_embeddings,
    data_manager=data_manager,
    similarity_evaluation=SearchDistanceEvaluation(),
    )
cache.set_openai_key()

gpt_client = LangChainChat(chat=AzureChatOpenAI(
        openai_api_base=os.getenv("OPENAI_API_BASE"),
        openai_api_version="2023-03-15-preview",
        deployment_name=os.getenv("CHAT_COMPLETION_DEPLOYMENT"),
        openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        openai_api_type="azure"
    ))

QUESTION_ANSWER_PROMPT = """
        [INSTRUCTION]: You are a helpful chatbot that has to satisfy user requests in its
        original language in the [USER REQUEST] section to the best of your capabilities.
      
        [SOURCES OF INFORMATION]:{context}
        [USER REQUEST]: {question}"""


question_prompt_template = PromptTemplate(template=QUESTION_ANSWER_PROMPT, input_variables=["context", "question"])

CONDENSE_PROMPT = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
        Chat History:
        {chat_history}
        Follow Up Input: {question}
        Standalone question:"""

condense_prompt_template = PromptTemplate.from_template(CONDENSE_PROMPT)

doc_chain = load_qa_chain(gpt_client, chain_type="stuff", prompt=question_prompt_template)

question_generator = LLMChain(llm=gpt_client, prompt=condense_prompt_template)

question_answer_chain = ConversationalRetrievalChain(retriever=elastic_client._es_client.as_retriever(search_type="similarity", search_kwargs={"k": 12}), combine_docs_chain=doc_chain, return_source_documents=True, question_generator=question_generator, return_generated_question=True, verbose=True)

vectordbkwargs = {"search_distance": 0.7}

chat_history=""
user_query = "Who won the competition?"
chat_history=""
start_time = time.time()
result = question_answer_chain({"question": user_query, "chat_history": chat_history ,"vectordbkwargs": vectordbkwargs})
print("Time consuming: {:.2f}s".format(time.time() - start_time))

this return 2.77s repeating the same query return 4.55s so there is no cache is working

Expected Behavior

Repeating. the same query should give around 0 time since it will returned from the cache

Steps To Reproduce

Python = 3.9.7
GPTcache = latest (v0.1.35)
langchain = latest (v0.0.229)

Environment

Windows,Jupyter Notebook

Anything else?

No response

About this issue

  • Original URL
  • State: closed
  • Created a year ago
  • Comments: 23

Most upvoted comments

@SimFG I found the solution is to set the max_distance value

cache.init(
    ...
    similarity_evaluation=SearchDistanceEvaluation(max_distance=1.0)
)

I appreciate the help

@SimFG so I did create this function to extract only the query from the prompt and pass it pre_embedding_func but it sill give the same first cached answer for every query

def custom_get_messages_last_content(data, **params) :
    separator = '[USER REQUEST]:'
    string = data.get("messages")[-1].content
    if separator in string:
      result = string.split(separator, 1)[1]
    return result 

hi, @Yafaa5 I run the demo code and the cache work well. my all code:

import getpass
import time

from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Milvus

from gptcache import cache
from gptcache.adapter.langchain_models import LangChainChat
from gptcache.embedding import Onnx
from gptcache.manager import CacheBase, VectorBase, get_data_manager
from gptcache.processor.pre import get_messages_last_content
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation

openai_key = getpass.getpass("Enter your OpenAI key: ")

# diff 1
# openai_client = OpenAI(model=os.getenv("EMBEDDING_MODEL_DEPLOYMENT"))
openai_client = Onnx()

# get the content(only question) form the prompt to cache
def get_content_func(data, **_):
    return data.get("prompt").split("Question")[-1]


cache_base = CacheBase('sqlite')
vector_base = VectorBase('faiss', dimension=openai_client.dimension, collection_name='chatbot')
data_manager = get_data_manager(cache_base, vector_base)
cache.init(
    pre_embedding_func=get_messages_last_content,
    embedding_func=openai_client.to_embeddings,
    data_manager=data_manager,
    similarity_evaluation=SearchDistanceEvaluation(),
)

# diff 2
gpt_client = LangChainChat(chat=ChatOpenAI(openai_api_key=openai_key))

QUESTION_ANSWER_PROMPT = """
        [INSTRUCTION]: You are a helpful chatbot that has to satisfy user requests in its
        original language in the [USER REQUEST] section to the best of your capabilities.

        [SOURCES OF INFORMATION]:{context}
        [USER REQUEST]: {question}"""

question_prompt_template = PromptTemplate(template=QUESTION_ANSWER_PROMPT, input_variables=["context", "question"])

CONDENSE_PROMPT = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
        Chat History:
        {chat_history}
        Follow Up Input: {question}
        Standalone question:"""

condense_prompt_template = PromptTemplate.from_template(CONDENSE_PROMPT)

doc_chain = load_qa_chain(gpt_client, chain_type="stuff", prompt=question_prompt_template)

question_generator = LLMChain(llm=gpt_client, prompt=condense_prompt_template)

# diff 3
vector_store = Milvus.from_texts(texts=[], embedding=OpenAIEmbeddings(openai_api_key=openai_key))
question_answer_chain = ConversationalRetrievalChain(
    retriever=vector_store.as_retriever(),
    combine_docs_chain=doc_chain, return_source_documents=True, question_generator=question_generator,
    return_generated_question=True, verbose=True)

vectordbkwargs = {"search_distance": 0.7}

chat_history = ""
user_query = "Who won the competition?"
chat_history = ""
start_time = time.time()
result = question_answer_chain({"question": user_query, "chat_history": chat_history, "vectordbkwargs": vectordbkwargs})
print("Time consuming: {:.2f}s".format(time.time() - start_time))

I have commented the diff code, like diff 1, diff 2, diff 3.

The test result: image

So i guess it maybe cause the unstable network