I tried to optimize a file browsing function written in Python, on Windows, by using os.scandir() instead of os.listdir(). However, time remains unchanged, about 2 minutes and a half, and I can't tell why. Below are the functions, original and altered:
os.listdir() version:
def browse(self, path, tree):
# for each entry in the path
for entry in os.listdir(path):
entity_path = os.path.join(path, entry)
# check if support by git or not
if self.git_ignore(entity_path) is False:
# if is a dir create a new level in the tree
if os.path.isdir( entity_path ):
tree[entry] = Folder(entry)
self.browse(entity_path, tree[entry])
# if is a file add it to the tree
if os.path.isfile(entity_path):
tree[entry] = File(entity_path)
os.scandir() version:
def browse(self, path, tree):
# for each entry in the path
for dirEntry in os.scandir(path):
entry_path = dirEntry.name
entity_path = dirEntry.path
# check if support by git or not
if self.git_ignore(entity_path) is False:
# if is a dir create a new level in the tree
if dirEntry.is_dir(follow_symlinks=True):
tree[entry_path] = Folder(entity_path)
self.browse(entity_path, tree[entry_path])
# if is a file add it to the tree
if dirEntry.is_file(follow_symlinks=True):
tree[entry_path] = File(entity_path)
In addition, here are the auxiliary functions used within this one:
def git_ignore(self, filepath):
if '.git' in filepath:
return True
if '.ci' in filepath:
return True
if '.delivery' in filepath:
return True
child = subprocess.Popen(['git', 'check-ignore', str(filepath)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output = child.communicate()[0]
status = child.wait()
return status == 0
============================================================
class Folder(dict):
def __init__(self, path):
self.path = path
self.categories = {}
============================================================
class File(object):
def __init__(self, path):
self.path = path
self.filename, self.extension = os.path.splitext(self.path)
Does anyone have a solution for how I can make the function run faster? My assumption is that the extraction of the name and path at the beginning makes it run slower than it should, is that correct?