I need to walk through folders with long file names in Windows.
I tried using os.listdir()
, but it crashes with long pathnames, which is bad.
I tried using os.walk()
, but it ignores the pathnames longer than ~256, which is worse.
I tried the magic word workaround described here, but it only works with mapped drives, not with UNC pathnames.
Here is an example with short pathnames, that shows that UNC pathnames don't work with the magic word trick.
>>> os.listdir('c:\\drivers')
['nusb3hub.cat', 'nusb3hub.inf', 'nusb3hub.sys', 'nusb3xhc.cat', 'nusb3xhc.inf', 'nusb3xhc.sys']
>>> os.listdir('\\\\Uni-hq-srv6\\router')
['2009-04-0210', '2010-11-0909', ... ]
>>> mw=u'\\\\?\\'
>>> os.listdir(mw+'c:\\drivers')
[u'nusb3hub.cat', u'nusb3hub.inf', u'nusb3hub.sys', u'nusb3xhc.cat', u'nusb3xhc.inf', u'nusb3xhc.sys']
>>> os.listdir(mw+'\\\\Uni-hq-srv6\\router')
Traceback (most recent call last):
File "<pyshell#160>", line 1, in <module>
os.listdir(mw+'\\\\Uni-hq-srv6\\router')
WindowsError: [Error 123] The filename, directory name, or volume label syntax is incorrect: u'\\\\?\\\\\\Uni-hq-srv6\\router\\*.*'
Any idea on how to deal with long pathnames or with unicode UNC pathnames?
Edit:
Following the suggestion of the comments below, I created some test functions to compare Python 2.7 and 3.3, and I added the test of glob.glob
and os.listdir
after os.chdir
.
The os.chdir
didn't help as expected (see this comment).
The glob.glob
is the only one that in Python 3.3 works better, but only in one condition: using the magic word and with the drive name.
Here is the code I used (it works on both 2.7 and 3.3). I am learning Python now, and I hope these tests make sense:
from __future__ import print_function
import os, glob
mw = u'\\\\?\\'
def walk(root):
n = 0
for root, dirs, files in os.walk(root):
n += len(files)
return n
def walk_mw(root):
n = 0
for root, dirs, files in os.walk(mw + root):
n += len(files)
return n
def listdir(root):
try:
folders = [f for f in os.listdir(root) if os.path.isdir(os.path.join(root, f))]
files = [f for f in os.listdir(root) if os.path.isfile(os.path.join(root, f))]
n = len(files)
for f in folders:
n += listdir(os.path.join(root, f))
return n
except:
return 'Crash'
def listdir_mw(root):
if not root.startswith(mw):
root = mw + root
try:
folders = [f for f in os.listdir(root) if os.path.isdir(os.path.join(root, f))]
files = [f for f in os.listdir(root) if os.path.isfile(os.path.join(root, f))]
n = len(files)
for f in folders:
n += listdir_mw(os.path.join(root, f))
return n
except:
return 'Crash'
def listdir_cd(root):
try:
os.chdir(root)
folders = [f for f in os.listdir('.') if os.path.isdir(os.path.join(f))]
files = [f for f in os.listdir('.') if os.path.isfile(os.path.join(f))]
n = len(files)
for f in folders:
n += listdir_cd(f)
return n
except:
return 'Crash'
def listdir_mw_cd(root):
if not root.startswith(mw):
root = mw + root
try:
os.chdir(root)
folders = [f for f in os.listdir('.') if os.path.isdir(os.path.join(f))]
files = [f for f in os.listdir('.') if os.path.isfile(os.path.join(f))]
n = len(files)
for f in folders:
n += listdir_cd(f) # the magic word can only be added the first time
return n
except:
return 'Crash'
def glb(root):
folders = [f for f in glob.glob(root + '\\*') if os.path.isdir(os.path.join(root, f))]
files = [f for f in glob.glob(root + '\\*') if os.path.isfile(os.path.join(root, f))]
n = len(files)
for f in folders:
n += glb(os.path.join(root, f))
return n
def glb_mw(root):
if not root.startswith(mw):
root = mw + root
folders = [f for f in glob.glob(root + '\\*') if os.path.isdir(os.path.join(root, f))]
files = [f for f in glob.glob(root + '\\*') if os.path.isfile(os.path.join(root, f))]
n = len(files)
for f in folders:
n += glb_mw(os.path.join(root, f))
return n
def test():
for txt1, root in [('drive ', r'C:\test'),
('UNC ', r'\\Uni-hq-srv6\router\test')]:
for txt2, func in [('walk ', walk),
('walk magic word ', walk_mw),
('listdir ', listdir),
('listdir magic word ', listdir_mw),
('listdir cd ', listdir_cd),
('listdir magic word cd ', listdir_mw_cd),
('glob ', glb),
('glob magic word ', glb_mw)]:
print(txt1, txt2, func(root))
test()
And here is the result:
- The number 8 means all the files were found
- The number 0 means it didn't even try without crashing
- Any number between 1 and 7 means it failed half way without crashing
- The word
Crash
means it crashed
-
Python 2.7
drive walk 5
drive walk magic word 8 * GOOD *
drive listdir Crash
drive listdir magic word 8 * GOOD *
drive listdir cd Crash
drive listdir magic word cd 5
drive glob 5
drive glob magic word 0
UNC walk 6
UNC walk magic word 0
UNC listdir 5
UNC listdir magic word Crash
UNC listdir cd 5
UNC listdir magic word cd Crash
UNC glob 5
UNC glob magic word 0
Python 3.3
drive walk 5
drive walk magic word 8 * GOOD *
drive listdir Crash
drive listdir magic word 8 * GOOD *
drive listdir cd Crash
drive listdir magic word cd 5
drive glob 5
drive glob magic word 8 * GOOD *
UNC walk 6
UNC walk magic word 0
UNC listdir 5
UNC listdir magic word Crash
UNC listdir cd 5
UNC listdir magic word cd Crash
UNC glob 5
UNC glob magic word 0
net use
and assign a drive letter for the UNC? – Germanophobenet use
them and thennet use /delete
them. – Benemptglob.glob("*")
work for you? – Ephraimite