I have a lot of docx files to be converted to pdf. Converting them one by one takes long time. So I write a python scripts to convert them in parallel:
from subprocess import Popen
import time
import os
os.chdir(os.path.dirname(__file__))
output_dir = './outputs'
source_file_format = './docs/example_{}.docx'
po_list = [Popen(
f"/Applications/LibreOffice.app/Contents/MacOS/soffice --invisible --convert-to pdf --outdir {output_dir} {source_file_format.format(i)}",
shell=True)
for i in range(0, 7, 1)]
while po_list:
time.sleep(0.01)
for i, p in enumerate(po_list):
status = p.poll()
if status is None:
continue
elif status == 0:
print('Succeed: [{}] {} -> {}'.format(p.returncode, p.stderr, p.args))
po_list.remove(p)
else:
print('Failed: {} : {}'.format(p.args, p.poll()))
po_list.remove(p)
But each time I run this script, only a part of docx files are converted successfully. The rest conversion processes even not throw any error info.
from joblib import Parallel, delayed import os def convert_docdocx_to_pdf(file_to_convert : str, output_folder : str ): """Convert a doc or docx document to pdf using Libre Office""" result = subprocess.call(['lowriter', '--convert-to', 'pdf', '--outdir', output_folder, file_to_convert]) return result Parallel(n_jobs = 2, prefer = "threads", timeout = 60)(delayed(convert_docdocx_to_pdf)(file, os.path.dirname(file)) for file in files)
– Loreanloredana