Flatten complex directory structure in Python
Asked Answered
S

6

10

I want to move files from a complex directory structure to just one place. For example i have this deep hierarchy:

foo/
    foo2/
        1.jpg
    2.jpg
    ...

I want it to be:

1.jpg
2.jpg
...

My current solution:

def move(destination):
    for_removal = os.path.join(destination, '\\')
    is_in_parent = lambda x: x.find(for_removal) > -1
    with directory(destination):
        files_to_move = filter(is_in_parent,
                               glob_recursive(path='.'))
    for file in files_to_move:
        shutil.move(file, destination)

Definitions: directory and glob_recursive. Note, that my code only moves files to their common parent directory, not an arbitrary destination.

How can i move all files from a complex hierarchy to a single place succinctly and elegantly?

Saretta answered 9/7, 2013 at 11:37 Comment(0)
I
3

Run recursively through directory, move the files and launch move for directories:

import shutil
import os

def move(destination, depth=None):
    if not depth:
        depth = []
    for file_or_dir in os.listdir(os.path.join([destination] + depth, os.sep)):
        if os.path.isfile(file_or_dir):
            shutil.move(file_or_dir, destination)
        else:
            move(destination, os.path.join(depth + [file_or_dir], os.sep))
Illiteracy answered 9/7, 2013 at 11:39 Comment(2)
absoulte path version ``` import shutil import os def move(destination, depth=''): current_depth = os.path.join(destination, depth) for file_or_dir in os.listdir(current_depth): file_or_dir = os.path.join(current_depth, file_or_dir) if os.path.isfile(file_or_dir): if depth: shutil.move(file_or_dir, destination) else: move(destination, os.path.join(depth, file_or_dir)) move(r'F:\sw\PycharmProjects\yappy-dev\lectures') ```Inga
This will not work as it is. os.path.join does NOT take a list, you will need to use a splat to put the list into separate args: os.path.join(*([destination] + depth))Dorton
A
10

I don't like testing the name of the file about to be moved to see if we're already in the destination directory. Instead, this solution only scans the subdirectories of the destination

import os
import itertools
import shutil


def move(destination):
    all_files = []
    for root, _dirs, files in itertools.islice(os.walk(destination), 1, None):
        for filename in files:
            all_files.append(os.path.join(root, filename))
    for filename in all_files:
        shutil.move(filename, destination)

Explanation: os.walk walks recursively the destination in a "top down" manner. whole filenames are constructed with the os.path.join(root, filename) call. Now, to prevent scanning files at the top of the destination, we just need to ignore the first element of the iteration of os.walk. To do that I use islice(iterator, 1, None). One other more explicit way would be to do this:

def move(destination):
    all_files = []
    first_loop_pass = True
    for root, _dirs, files in os.walk(destination):
        if first_loop_pass:
            first_loop_pass = False
            continue
        for filename in files:
            all_files.append(os.path.join(root, filename))
    for filename in all_files:
        shutil.move(filename, destination)
Anglaangle answered 9/7, 2013 at 13:16 Comment(2)
You don't handle the case where the filname you are moving is the same as a filename in the destination dir. For instance, what if the source sub dirs contain only files named hello.txt? move() will throw an error.Galton
This solution will also leave empty sub directories around. To solve: store _dirs next to where you set first_loop_pass to False, then at the end after you do the file moves just loop over the dirs and call shutil.rmtree()Gyrostatic
S
5

this would do, it also renames files if they collide (I commented out the actual move and replaced with a copy):

import os
import sys
import string
import shutil

#Generate the file paths to traverse, or a single path if a file name was given
def getfiles(path):
    if os.path.isdir(path):
        for root, dirs, files in os.walk(path):
            for name in files:
                yield os.path.join(root, name)
    else:
        yield path

destination = "./newdir/"
fromdir = "./test/"
for f in getfiles(fromdir):
    filename = string.split(f, '/')[-1]
    if os.path.isfile(destination+filename):
        filename = f.replace(fromdir,"",1).replace("/","_")
    #os.rename(f, destination+filename)
    shutil.copy(f, destination+filename)
Sociolinguistics answered 9/7, 2013 at 12:4 Comment(2)
destination+filename can be an unwanted filepath location. You'd better use os.path.join(destination, 'filename')Rugging
Consider replacing all 【"/"】 with 【os.sep】. Otherwise, it will cause many problems when crossing operating systems.Antigen
I
3

Run recursively through directory, move the files and launch move for directories:

import shutil
import os

def move(destination, depth=None):
    if not depth:
        depth = []
    for file_or_dir in os.listdir(os.path.join([destination] + depth, os.sep)):
        if os.path.isfile(file_or_dir):
            shutil.move(file_or_dir, destination)
        else:
            move(destination, os.path.join(depth + [file_or_dir], os.sep))
Illiteracy answered 9/7, 2013 at 11:39 Comment(2)
absoulte path version ``` import shutil import os def move(destination, depth=''): current_depth = os.path.join(destination, depth) for file_or_dir in os.listdir(current_depth): file_or_dir = os.path.join(current_depth, file_or_dir) if os.path.isfile(file_or_dir): if depth: shutil.move(file_or_dir, destination) else: move(destination, os.path.join(depth, file_or_dir)) move(r'F:\sw\PycharmProjects\yappy-dev\lectures') ```Inga
This will not work as it is. os.path.join does NOT take a list, you will need to use a splat to put the list into separate args: os.path.join(*([destination] + depth))Dorton
S
2
import os.path, shutil

def move(src, dest):
    not_in_dest = lambda x: os.path.samefile(x, dest)
    files_to_move = filter(not_in_dest,
                           glob_recursive(path=src))

    for f in files_to_move:
        shutil.move(f, dest)

Source for glob_recursive. Does not change name of file, if they collide.

samefile is a safe way to compare paths. But it doesn't work on Windows, so check How to emulate os.path.samefile behaviour on Windows and Python 2.7?.

Saretta answered 9/7, 2013 at 12:16 Comment(0)
P
0
def splitPath(p):
    a,b = os.path.split(p)
    return (splitPath(a) if len(a) and len(b) else []) + [b]

def safeprint(s):
    try:
        print(s)
    except UnicodeEncodeError:
        if sys.version_info >= (3,):
            print(s.encode('utf8').decode(sys.stdout.encoding))
        else:
            print(s.encode('utf8'))

def flatten(root, doit):
    
    SEP  = "¦"
    REPL = "?"

    folderCount = 0
    fileCount = 0

    if not doit:
        print("Simulating:")

    for path, dirs, files in os.walk(root, topdown=False):

        if path != root:

            for f in files:

                sp = splitPath(path)

                np = ""

                for element in sp[1:]:
                    e2 = element.replace(SEP, REPL)
                    np += e2 + SEP

                f2 = f.replace(SEP, REPL)
                newName = np + f2

                safeprint("Moved:   "+ newName )
                if doit:
                    shutil.move(os.path.join(path, f), os.path.join(root, f))
                    # Uncomment, if you want filenames to be based on folder hierarchy.
                    #shutil.move(os.path.join(path, f), os.path.join(root, newName))
                fileCount += 1

            safeprint("Removed: "+ path)
            if doit:
                os.rmdir(path)
            folderCount += 1

    if doit:
        print("Done.")        
    else:
        print("Simulation complete.")


    print("Moved files:", fileCount)
    print("Removed folders:", folderCount)


directory_path = r"C:\Users\jd\Documents\myFtpData"
flatten(directory_path, True)
Protoplast answered 10/2, 2021 at 18:29 Comment(0)
H
0

Adding on to the answers, I believe my answer will satisfy all your needs, the other answers fail when there is a subdirectory and file with the same filename as the upper directory.

This was SOLVED here, Also look at my Github Repo for Structured File Copy and Flattened File Copy:

import os, fnmatch, shutil

PATTERN = '*.txt' # Regex Pattern to Match files
INPUT_FOLDER = "A" # os.getcwd()
INPUT_FOLDER = os.path.abspath(INPUT_FOLDER)
include_input_foldername = False
prepend = "_included" if include_input_foldername else ""
OUTPUT_FOLDER = f"Structured_Copy_{os.path.basename(INPUT_FOLDER)}{prepend}"

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

def find(pattern, path):
    """Utility to find files wrt a regex search"""
    result = []
    for root, dirs, files in os.walk(path):
        for name in files:
            if fnmatch.fnmatch(name, pattern):
                result.append(os.path.join(root, name))
    return result

all_files = find(PATTERN, INPUT_FOLDER)

for each_path in all_files:
    relative_path = os.path.relpath(each_path, os.path.dirname(INPUT_FOLDER)) if include_input_foldername else os.path.relpath(each_path, INPUT_FOLDER) 
    flattened_relative_fullpath = os.path.join(OUTPUT_FOLDER, relative_path)
    os.makedirs(os.path.dirname(flattened_relative_fullpath), exist_ok=True)
    shutil.copy(each_path, flattened_relative_fullpath)
    print(f"Copied {each_path} to {flattened_relative_fullpath}")
    
print(f"Finished Copying {len(all_files)} Files from : {INPUT_FOLDER} to : {OUTPUT_FOLDER}")
Homozygous answered 19/4, 2022 at 11:53 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.