I am doing a microservice with a document loader, and the app can't launch at the import level, when trying to import langchain's UnstructuredMarkdownLoader
$ flask --app main run --debug
Traceback (most recent call last):
File "venv/bin/flask", line 8, in <module>
sys.exit(main())
File "venv/lib/python3.9/site-packages/flask/cli.py", line 1063, in main
cli.main()
File "venv/lib/python3.9/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "venv/lib/python3.9/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "venv/lib/python3.9/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "venv/lib/python3.9/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "venv/lib/python3.9/site-packages/click/decorators.py", line 84, in new_func
return ctx.invoke(f, obj, *args, **kwargs)
File "venv/lib/python3.9/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "venv/lib/python3.9/site-packages/flask/cli.py", line 911, in run_command
raise e from None
File "venv/lib/python3.9/site-packages/flask/cli.py", line 897, in run_command
app = info.load_app()
File "venv/lib/python3.9/site-packages/flask/cli.py", line 308, in load_app
app = locate_app(import_name, name)
File "venv/lib/python3.9/site-packages/flask/cli.py", line 218, in locate_app
__import__(module_name)
File "main.py", line 5, in <module>
from lc_indexer import index_documents
File "lc_indexer.py", line 5, in <module>
from langchain.document_loaders import UnstructuredMarkdownLoader
File "venv/lib/python3.9/site-packages/langchain/__init__.py", line 6, in <module>
from langchain.agents import MRKLChain, ReActChain, SelfAskWithSearchChain
File "venv/lib/python3.9/site-packages/langchain/agents/__init__.py", line 2, in <module>
from langchain.agents.agent import (
File "venv/lib/python3.9/site-packages/langchain/agents/agent.py", line 16, in <module>
from langchain.agents.tools import InvalidTool
File "venv/lib/python3.9/site-packages/langchain/agents/tools.py", line 8, in <module>
from langchain.tools.base import BaseTool, Tool, tool
File "venv/lib/python3.9/site-packages/langchain/tools/__init__.py", line 42, in <module>
from langchain.tools.vectorstore.tool import (
File "venv/lib/python3.9/site-packages/langchain/tools/vectorstore/tool.py", line 13, in <module>
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
File "venv/lib/python3.9/site-packages/langchain/chains/__init__.py", line 2, in <module>
from langchain.chains.api.base import APIChain
File "venv/lib/python3.9/site-packages/langchain/chains/api/base.py", line 13, in <module>
from langchain.chains.api.prompt import API_RESPONSE_PROMPT, API_URL_PROMPT
File "venv/lib/python3.9/site-packages/langchain/chains/api/prompt.py", line 2, in <module>
from langchain.prompts.prompt import PromptTemplate
File "venv/lib/python3.9/site-packages/langchain/prompts/__init__.py", line 3, in <module>
from langchain.prompts.chat import (
File "venv/lib/python3.9/site-packages/langchain/prompts/chat.py", line 10, in <module>
from langchain.memory.buffer import get_buffer_string
File "venv/lib/python3.9/site-packages/langchain/memory/__init__.py", line 28, in <module>
from langchain.memory.vectorstore import VectorStoreRetrieverMemory
File "venv/lib/python3.9/site-packages/langchain/memory/vectorstore.py", line 10, in <module>
from langchain.vectorstores.base import VectorStoreRetriever
File "venv/lib/python3.9/site-packages/langchain/vectorstores/__init__.py", line 2, in <module>
from langchain.vectorstores.analyticdb import AnalyticDB
File "venv/lib/python3.9/site-packages/langchain/vectorstores/analyticdb.py", line 16, in <module>
from langchain.embeddings.base import Embeddings
File "venv/lib/python3.9/site-packages/langchain/embeddings/__init__.py", line 19, in <module>
from langchain.embeddings.openai import OpenAIEmbeddings
File "venv/lib/python3.9/site-packages/langchain/embeddings/openai.py", line 67, in <module>
class OpenAIEmbeddings(BaseModel, Embeddings):
File "pydantic/main.py", line 197, in pydantic.main.ModelMetaclass.__new__
File "pydantic/fields.py", line 506, in pydantic.fields.ModelField.infer
File "pydantic/fields.py", line 436, in pydantic.fields.ModelField.__init__
File "pydantic/fields.py", line 552, in pydantic.fields.ModelField.prepare
File "pydantic/fields.py", line 663, in pydantic.fields.ModelField._type_analysis
File "pydantic/fields.py", line 808, in pydantic.fields.ModelField._create_sub_type
File "pydantic/fields.py", line 436, in pydantic.fields.ModelField.__init__
File "pydantic/fields.py", line 552, in pydantic.fields.ModelField.prepare
File "pydantic/fields.py", line 668, in pydantic.fields.ModelField._type_analysis
File "/home/my_username/.pyenv/versions/3.9.16/lib/python3.9/typing.py", line 852, in __subclasscheck__
return issubclass(cls, self.__origin__)
TypeError: issubclass() arg 1 must be a class
Here is the content of lc_indexer.py
where the langchain imports occur
# INDEX DOCUMENTS
import os
from os.path import join, isfile
from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import TokenTextSplitter, CharacterTextSplitter
from langchain.vectorstores import Chroma
def index_documents(source_directories: list[str], persist_directory: str, chunk_size: int = 1000,
chunk_overlap: int = 15):
"""
Indexe les documents venant des répertoires fournis
:param source_directories: list[str]
:param persist_directory: str
:param chunk_size: int = 1000
:param chunk_overlap: int = 15
:return:
"""
only_files = []
for directory in source_directories:
my_path = f'{directory}'
for f in os.listdir(my_path):
if isfile(join(my_path, f)):
only_files.append(f'{my_path}/{f}')
embeddings = OpenAIEmbeddings()
for file in only_files:
index_file_to_chroma(file, persist_directory, embeddings, chunk_size, chunk_overlap)
def index_file_to_chroma(file: str, persist_directory: str, embeddings: OpenAIEmbeddings, chunk_size: int, chunk_overlap: int):
"""
Indexe un document dans Chroma
:param embeddings: OpenAIEmbeddings
:param file: str
:param persist_directory: str
:param chunk_size: int
:param chunk_overlap: int
:return:
"""
loader = UnstructuredMarkdownLoader(file_path=file, encoding='utf8')
docs = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
pages = text_splitter.split_documents(docs)
text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
texts = text_splitter.split_documents(pages)
db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
db.persist()
print(f'Indexed file {file} for module {persist_directory}')
db = None
# /INDEX DOCUMENTS
This file has been copied from a test project where no such error occurs at all when trying it but it was tested from the CLI so it may change something here.
Already tried copying those functions and the imports into the main.py
file, but I get the same error.
I have tried commenting the import of lc_indexer.py
and the call to the index_documents
function in the main.py
, and it launches no problem.
What is the root of the problem here? Langchain requirements have been installed