Trying to create a function that returns the # of files found a directory and its subdirectories. Just need help getting started
One - liner
import os
cpt = sum([len(files) for r, d, files in os.walk("G:\CS\PYTHONPROJECTS")])
Use os.walk
. It will do the recursion for you. See http://www.pythonforbeginners.com/code-snippets-source-code/python-os-walk/ for an example.
total = 0
for root, dirs, files in os.walk(folder):
total += len(files)
scandir()
. –
Honeydew Just add an elif
statement that takes care of the directories:
def fileCount(folder):
"count the number of files in a directory"
count = 0
for filename in os.listdir(folder):
path = os.path.join(folder, filename)
if os.path.isfile(path):
count += 1
elif os.path.isfolder(path):
count += fileCount(path)
return count
os.path.isdir
works for me on Ubuntu instead of os.path.isfolder
. –
Eleven - Here are some one-liners using pathlib, which is part of the standard library.
- Use
Path.cwd().rglob('*')
orPath('some path').rglob('*')
, which creates a generator of all the files.- Unpack the generator with
list
or*
, and uselen
to get the number of files.
- Unpack the generator with
- See How to count total number of files in each subfolder to get the total number of files for each directory.
from pathlib import Path
total_dir_files = len(list(Path.cwd().rglob('*')))
# or
total_dir_files = len([*Path.cwd().rglob('*')])
# or filter for only files using is_file()
file_count = len([f for f in Path.cwd().rglob('*') if f.is_file()])
Here is a time-test for the 3 most popular methods:
import os
from datetime import datetime
dir_path = "D:\\Photos"
# os.listdir
def recursive_call(dir_path):
folder_array = os.listdir(dir_path)
files = 0
folders = 0
for path in folder_array:
if os.path.isfile(os.path.join(dir_path, path)):
files += 1
elif os.path.isdir(os.path.join(dir_path, path)):
folders += 1
file_count, folder_count = recursive_call(os.path.join(dir_path, path))
files += file_count
folders += folder_count
return files, folders
start_time = datetime.now()
files, folders = recursive_call(dir_path)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.listdir): %s seconds" % (datetime.now() - start_time).total_seconds())
# os.walk
start_time = datetime.now()
file_array = [len(files) for r, d, files in os.walk(dir_path)]
files = sum(file_array)
folders = len(file_array)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.walk): %s seconds" % (datetime.now() - start_time).total_seconds())
# os.scandir
def recursive_call(dir_path):
folder_array = os.scandir(dir_path)
files = 0
folders = 0
for path in folder_array:
if path.is_file():
files += 1
elif path.is_dir():
folders += 1
file_count, folder_count = recursive_call(path)
files += file_count
folders += folder_count
return files, folders
start_time = datetime.now()
files, folders = recursive_call(dir_path)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.scandir): %s seconds" % (datetime.now() - start_time).total_seconds())
Results:
Folders: 53, Files: 29048
Time Taken (os.listdir): 3.074945 seconds
Folders: 53, Files: 29048
Time Taken (os.walk): 0.062022 seconds
Folders: 53, Files: 29048
Time Taken (os.scandir): 0.048984 seconds
Conclusion:
While os.walk
is the most elegant, os.scandir
recursively implemented seems to be the fastest.
Here is my shoot, it worked fine on Windows where I was testing it
from pprint import pprint # Optional
import pathlib
def count_folder_itens(folder, count):
path = pathlib.Path(folder)
folder = []
file_count = 0
try:
for item in path.iterdir():
if item.is_dir():
# Item is actually a folder, call the function again and store
# The count result and the structure of this subfolder
(subfolder, subfile_count) = count_folder_itens(item, file_count)
# Concatenate the result of the subfolder and continue the loop
folder.append((str(item), subfolder))
file_count += subfile_count
# If the item is not a folder, it's a file, so add +1 to the file_count
else:
file_count += 1
return (folder, file_count)
except Exception as e:
print(f'Invalid folder path: {path}')
return ([], 0)
# Escaped folder path
folder_path = r'C:\\Users\\wwwxkz\\Downloads\\'
# Call count_folder_itens starting with 0 itens
result = count_folder_itens(folder_path, 0)
# Number of itens in the folder and its subfolders
print(f'{folder_path}: ' + str(result[1]))
# Print all folder structure recursively
pprint(result[0]) # Optional
One use case for scanning a directory this way is to get an initial count of the number of entries that a subsequent shutil.copytree
or archive creation command will need to handle. The following stat_tree
function accepts a copytree
style ignore
filter function (compatible with shutil.ignore_patterns
) that allows specification of files and folders to be skipped, and returns counts of the number of files, the number of directories, and optionally the total file size (note: due to the way os.lstat
works, symlinks are counted as their symlink size, while hardlinks are counted as their target size).
import os, os.path
def stat_tree(src, ignore=None, *, report_size=True):
"""Count directories and files in a tree (respecting a `copytree` `ignore` filter)
Returns a 3-tuple of (num_directories, num_files, total_file_size).
`total_file_size` is always zero if `report_size` is set to `False`
"""
total_directories = 0
total_files = 0
total_file_size = 0
for this_dir, dirnames, filenames in os.walk(src):
if ignore is not None:
ignored_names = ignore(this_dir, dirnames + filenames)
# Don't count ignored files
filenames[:] = [name for name in filenames if name not in ignored_names]
# Don't iterate over ignored directories
dirnames[:] = [name for name in dirnames if name not in ignored_names]
total_directories += 1
total_files += len(filenames)
if report_size:
for name in filenames:
total_file_size += os.lstat(os.path.join(this_dir, name)).st_size
return total_directories, total_files, total_file_size
Examples:
~/devel$ python3 -c "from stat_tree import stat_tree; print(stat_tree('cpython/Lib'))"
(342, 3990, 88805245)
~/devel$ python3 -c "from stat_tree import stat_tree; from shutil import ignore_patterns; print(stat_tree('cpython/Lib', ignore_patterns('__pycache__')))"
(199, 2272, 42612082)
~/devel$ python3 -c "from stat_tree import stat_tree; print(stat_tree('cpython/Lib', report_size=False))"
(342, 3990, 0)
Here is My Version
def fileCount(folder, allowed_extensions=None):
"count the number of files in a directory and sub directory"
count = 0
for base, dirs, files in os.walk(folder):
for file in files:
if allowed_extensions and file.endswith(allowed_extensions) or not allowed_extensions:
count += 1
return count
scan_dir = r"C:\Users\sannjayy\Desktop"
allowed_extensions = (".jpg", ".mp4")
print(fileCount(scan_dir , allowed_extensions))
Simple solution using glob
.
import os.path
from glob import glob
result = sum(
1
for k in glob("/Users/home/sample_dir/**/*", recursive=True)
if os.path.isfile(k)
)
print(result)
© 2022 - 2024 — McMap. All rights reserved.