i have built a splitter function with langchain library that splits a series of python files. At another point in the code I need to convert these documents back into python code. Only I do not know how to do this
def index_repo(repo_url):
os.environ['OPENAI_API_KEY'] = ""
contents = []
fileextensions = [
".py", ]
print('cloning repo')
repo_dir = get_repo(repo_url)
file_names = []
for dirpath, dirnames, filenames in os.walk(repo_dir):
for file in filenames:
if file.endswith(tuple(fileextensions)):
file_names.append(os.path.join(dirpath, file))
try:
with open(os.path.join(dirpath, file), "r", encoding="utf-8") as f:
contents.append(f.read())
except Exception as e:
pass
# chunk the files
text_splitter = RecursiveCharacterTextSplitter.from_language(language=Language.PYTHON, chunk_size=5000, chunk_overlap=0)
texts = text_splitter.create_documents(contents)
return texts, file_names