chainlit: Error in AsyncLangchainCallbackHandler.on_llm_start callback: 'NoneType' object is not a mapping

Trying to follow the document QA example on local dataset with the exception of using llama-2 instead of OpenAI. I am stumped by the error in AsyncLangChainCallbackHandler. Whatever I have tried has been unsuccessful. How to overcome the error?

My code is:


import langchain and modules
import chainlit as cl
text_splitter = RecursiveCharacterTextSplitter()
system_template = """some template"""
messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template("{question}"),
]
prompt = ChatPromptTemplate.from_messages(messages)
chain_type_kwargs = {"prompt": prompt}
name = "meta-llama/Llama-2-7b-chat-hf"
auth_token = "****"
@cl.on_chat_start
async def init():
    print('start')
    with open('file.txt', 'r') as f:
        text = f.read()
    tokenizer = AutoTokenizer.from_pretrained(name)
    model = AutoModelForCausalLM.from_pretrained(name)
    # Split the text into chunks
    texts = text_splitter.split_text(text)
    # Create a metadata for each chunk
    metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
    # Create a Chroma vector store
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    print('embeddings created')
    docsearch = await cl.make_async(Chroma.from_texts)(texts, embeddings, metadatas=metadatas, persist_directory="db")
    docsearch.persist()    
    print('docsearch done')
    generate_text = transformers.pipeline(
        model=model,
        tokenizer=tokenizer,
        return_full_text=True, 
        task='text-generation',
        # we pass model parameters here too
        temperature=0.1,  
        max_new_tokens=512,  
        repetition_penalty=1.1  
    )
    llm = HuggingFacePipeline(pipeline=generate_text)
    # Create a chain that uses the Chroma vector store
    chain = RetrievalQAWithSourcesChain.from_chain_type(
        llm,
        chain_type="stuff",
        retriever=docsearch.as_retriever(),
    )
    # Save the metadata and texts in the user session
    cl.user_session.set("metadatas", metadatas)
    cl.user_session.set("texts", texts)
    cl.user_session.set("chain", chain)
@cl.on_message
async def main(message):
    chain = cl.user_session.get("chain")  
    cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"])
    cb.answer_reached = True
    res = await chain.acall(message, callbacks=[cb])
    print('res updated')
    answer = res["answer"].strip()
    sources = res["sources"].strip()
    source_elements = []
    # Get the metadata and texts from the user session
    metadatas = cl.user_session.get("metadatas")
    all_sources = [m["source"] for m in metadatas]
    texts = cl.user_session.get("texts")
    if sources:
        found_sources = []
        # Add the sources to the message
        for source in sources.split(","):
            source_name = source.strip().replace(".", "")
            # Get the index of the source
            try:
                index = all_sources.index(source_name)
            except ValueError:
                continue
            text = texts[index]
            found_sources.append(source_name)
            # Create the text element referenced in the message
            source_elements.append(cl.Text(content=text, name=source_name))
        if found_sources:
            answer += f"\nSources: {', '.join(found_sources)}"
        else:
            answer += "\nNo sources found"
    if cb.has_streamed_final_answer:
        cb.final_stream.elements = source_elements
        await cb.final_stream.update()
    else:
        await cl.Message(content=answer, elements=source_elements).send()

About this issue

Original URL
State: closed
Created 10 months ago
Comments: 15

Most upvoted comments

This one I understand, the easy fix is to add a default argument none to the pop function. Will release a fix https://github.com/Chainlit/chainlit/pull/368/files

willydouhard on Sep 6, 2023