How does one correctly parse data from load_qa_chain?
It is easy to retrieve an answer using the QA chain, but we want the LLM to return two answers, which then parsed by a output parser, PydanticOutputParser.
The chain returns:
{'output_text': '\n1. Contract item of interest: Termination. \n2. Termination: Yes.}
But then trying to parse the result with the parser gives JSONDecodeError: Expecting value: line 1 column 1 (char 0) :
parser.parse(result['output_text'])
How should I change the chain to be able to correctly parse the output?
Here is the chain below:
from langchain.chains.question_answering import load_qa_chain
from langchain import PromptTemplate
from dotenv import load_dotenv
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import AzureOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.document_loaders.csv_loader import CSVLoader
load_dotenv()
def load_embeddings(row_num):
"""
This function loads embeddings from source documents. We want to answer questions from
these documents.
"""
embedding_function = OpenAIEmbeddings(
openai_api_key=os.getenv("OPENAI_API_KEY"),
deployment=os.getenv('EMBEDDING_DEPLOYMENT_NAME'),
model=os.getenv('EMBEDDING_MODEL'),
chunk_size=1
)
loader = CSVLoader(file_path='data/contacts.csv', source_column="Context", csv_args = {"delimiter": ',',})
doc = loader.load()
contract = [doc[row_num].page_content]
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 500,
chunk_overlap = 50,
length_function = len,
)
context_split = text_splitter.create_documents(contract)
db = Chroma.from_documents(context_split, embedding_function)
return db
row_num = 0
db = load_embeddings(row_num)
item = "Termination Clause" # The item we want to search for
class PersonIntel(BaseModel):
contract_item_of_interest: str = Field(description="Item that is being searched for in the contract")
item: str = Field(description="Is there any evidence that of the item in the contract? Yes or No")
def to_dict(self):
return {
"contract_item_of_interest": self.contract_item_of_interest,
"item": self.item,
}
llm = AzureOpenAI(deployment_name=os.getenv('CHAT_DEPLOYMENT_NAME'),
openai_api_version="2023-05-15",
model_name=os.getenv('CHAT_MODEL'),
temperature=0)
query = f"Is there any evidence that of {item} in the contract?"
docs = db.similarity_search(query)
template = """
You are a legal assistant. Given the following contract below, Answer the question.
Questions:
1. Can you tell me whether {question} is mentioned in this contract?
{context}
# Your answer must be returned in the following structure:
1. Contract item of interest: {question}.
2. {question}: Yes or No'.
\n{format_instructions}
"""
parser = PydanticOutputParser(pydantic_object=PersonIntel)
PROMPT = PromptTemplate(
template=template,
input_variables=["summaries", "question"],
partial_variables={
"format_instructions": parser.get_format_instructions()
}
)
chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT, document_variable_name="summaries")
result = chain({"input_documents": docs, "question": item}, return_only_outputs=True)
result