Downloading a directory tree with ftplib
Asked Answered
A

6

18

This will not download the contents of sub-directories; how can I do so?

import ftplib
import configparser
import os

directories = []

def add_directory(line):
 if line.startswith('d'):
  bits = line.split()
  dirname = bits[8]
  directories.append(dirname)

def makeDir(archiveTo):
 for dir in directories:
  newDir = os.path.join(archiveTo, dir)
  if os.path.isdir(newDir) == True:
   print("Directory \"" + dir + "\" already exists!")
  else:
   os.mkdir(newDir)

def getFiles(archiveTo, ftp):
 files = ftp.nlst()
 for filename in files:
  try:
   directories.index(filename)
  except:
   ftp.retrbinary('RETR %s' % filename, open(os.path.join(archiveTo, filename), 'wb').write)

def runBackups():

 #Load INI
 filename = 'connections.ini'
 config = configparser.SafeConfigParser()
 config.read(filename)

 connections = config.sections()
 i = 0

 while i < len(connections):
  #Load Settings
  uri = config.get(connections[i], "uri")
  username = config.get(connections[i], "username")
  password = config.get(connections[i], "password")
  backupPath = config.get(connections[i], "backuppath")
  archiveTo = config.get(connections[i], "archiveto")

  #Start Back-ups
  ftp = ftplib.FTP(uri)
  ftp.login(username, password)
  ftp.cwd(backupPath)

  #Map Directory Tree
  ftp.retrlines('LIST', add_directory)

  #Make Directories Locally
  makeDir(archiveTo)

  #Gather Files
  getFiles(archiveTo, ftp)

  #End connection and increase counter.
  ftp.quit()
  i += 1

 print()
 print("Back-ups complete.")
 print()
Apperception answered 9/4, 2010 at 4:40 Comment(0)
A
17

this should do the trick :)

import sys
import ftplib
import os
from ftplib import FTP
ftp=FTP("ftp address")
ftp.login("user","password")

def downloadFiles(path,destination):
#path & destination are str of the form "/dir/folder/something/"
#path should be the abs path to the root FOLDER of the file tree to download
    try:
        ftp.cwd(path)
        #clone path to destination
        os.chdir(destination)
        os.mkdir(destination[0:len(destination)-1]+path)
        print destination[0:len(destination)-1]+path+" built"
    except OSError:
        #folder already exists at destination
        pass
    except ftplib.error_perm:
        #invalid entry (ensure input form: "/dir/folder/something/")
        print "error: could not change to "+path
        sys.exit("ending session")

    #list children:
    filelist=ftp.nlst()
    
    for file in filelist:
        try:
            #this will check if file is folder:
            ftp.cwd(path+file+"/")
            #if so, explore it:
            downloadFiles(path+file+"/",destination)
        except ftplib.error_perm:
            #not a folder with accessible content
            #download & return
            os.chdir(destination[0:len(destination)-1]+path)
            #possibly need a permission exception catch:
            with open(os.path.join(destination,file),"wb") as f:
                ftp.retrbinary("RETR "+file, f.write)
            print file + " downloaded"
    return

source="/ftproot/folder_i_want/"
dest="/systemroot/where_i_want_it/"
downloadFiles(source,dest)
Ailin answered 18/5, 2012 at 19:45 Comment(1)
also, you may not want to exit in the first ftplib.error_perm except in case your User account has permission issues. pass could work.Ailin
L
17

This is a very old question, but I had a similar need that i wanted to satisfy in a very general manner. I ended up writing my own solution that works very well for me. I've placed it on Gist here https://gist.github.com/Jwely/ad8eb800bacef9e34dd775f9b3aad987

and pasted it below in case i ever take the gist offline.

Example usage:

import ftplib
ftp = ftplib.FTP(mysite, username, password)
download_ftp_tree(ftp, remote_dir, local_dir)

The code above will look for a directory called "remote_dir" on the ftp host, and then duplicate the directory and its entire contents into the "local_dir". It invokes the script below.

import ftplib
import os

def _is_ftp_dir(ftp_handle, name, guess_by_extension=True):
    """ simply determines if an item listed on the ftp server is a valid directory or not """

    # if the name has a "." in the fourth to last position, its probably a file extension
    # this is MUCH faster than trying to set every file to a working directory, and will work 99% of time.
    if guess_by_extension is True:
        if name[-4] == '.':
            return False

    original_cwd = ftp_handle.pwd()     # remember the current working directory
    try:
        ftp_handle.cwd(name)            # try to set directory to new name
        ftp_handle.cwd(original_cwd)    # set it back to what it was
        return True
    except:
        return False


def _make_parent_dir(fpath):
    """ ensures the parent directory of a filepath exists """
    dirname = os.path.dirname(fpath)
    while not os.path.exists(dirname):
        try:
            os.mkdir(dirname)
            print("created {0}".format(dirname))
        except:
            _make_parent_dir(dirname)


def _download_ftp_file(ftp_handle, name, dest, overwrite):
    """ downloads a single file from an ftp server """
    _make_parent_dir(dest)
    if not os.path.exists(dest) or overwrite is True:
        with open(dest, 'wb') as f:
            ftp_handle.retrbinary("RETR {0}".format(name), f.write)
        print("downloaded: {0}".format(dest))
    else:
        print("already exists: {0}".format(dest))


def _mirror_ftp_dir(ftp_handle, name, overwrite, guess_by_extension):
    """ replicates a directory on an ftp server recursively """
    for item in ftp_handle.nlst(name):
        if _is_ftp_dir(ftp_handle, item):
            _mirror_ftp_dir(ftp_handle, item, overwrite, guess_by_extension)
        else:
            _download_ftp_file(ftp_handle, item, item, overwrite)


def download_ftp_tree(ftp_handle, path, destination, overwrite=False, guess_by_extension=True):
    """
    Downloads an entire directory tree from an ftp server to the local destination

    :param ftp_handle: an authenticated ftplib.FTP instance
    :param path: the folder on the ftp server to download
    :param destination: the local directory to store the copied folder
    :param overwrite: set to True to force re-download of all files, even if they appear to exist already
    :param guess_by_extension: It takes a while to explicitly check if every item is a directory or a file.
        if this flag is set to True, it will assume any file ending with a three character extension ".???" is
        a file and not a directory. Set to False if some folders may have a "." in their names -4th position.
    """
    os.chdir(destination)
    _mirror_ftp_dir(ftp_handle, path, overwrite, guess_by_extension)
Likelihood answered 21/4, 2016 at 17:25 Comment(2)
Awesome. Works like a charm. Should be a library for this!Chromogen
Dude! vYour code is a FUCKING BEAUTY! Well described, works like charm, first run! I owe you few hours of my life!Blim
C
8

this is an alternative. you can try using ftputil package. You can then use it to walk the remote directories and get your files

Carcinoma answered 9/4, 2010 at 5:21 Comment(1)
Is it really that complicated to the point where I should be considering packages? I'm thinking it'll just take 10 lines of really complicated code.Apperception
L
6

Using ftp.mlsd() instead of ftp.nlst():

import sys
import ftplib
import os
from ftplib import FTP


def fetchFiles(ftp, path, destination, overwrite=True):
    '''Fetch a whole folder from ftp. \n
    Parameters
    ----------
    ftp         : ftplib.FTP object
    path        : string ('/dir/folder/')
    destination : string ('D:/dir/folder/') folder where the files will be saved
    overwrite   : bool - Overwrite file if already exists.
    '''
    try:
        ftp.cwd(path)
        os.mkdir(destination[:-1] + path)
        print('New folder made: ' + destination[:-1] + path)
    except OSError:
        # folder already exists at the destination
        pass
    except ftplib.error_perm:
        # invalid entry (ensure input form: "/dir/folder/")
        print("error: could not change to " + path)
        sys.exit("ending session")

    # list children:
    filelist = [i for i in ftp.mlsd()]
    print('Current folder: ' + filelist.pop(0)[0])

    for file in filelist:
        if file[1]['type'] == 'file':
            fullpath = os.path.join(destination[:-1] + path, file[0])
            if (not overwrite and os.path.isfile(fullpath)):
                continue
            else:
                with open(fullpath, 'wb') as f:
                    ftp.retrbinary('RETR ' + file[0], f.write)
                print(file[0] + '  downloaded')
        elif file[1]['type'] == 'dir':
            fetchFiles(ftp, path + file[0] + '/', destination, overwrite)
        else:
            print('Unknown type: ' + file[1]['type'])


if __name__ == "__main__":

    ftp = FTP('ftp address')
    ftp.login('user', 'password')
    source = r'/Folder/'
    dest = r'D:/Data/'
    fetchFiles(ftp, source, dest, overwrite=True)
    ftp.quit()
Lacking answered 30/8, 2018 at 7:5 Comment(1)
ftp.cwd(path) should be added after if file[1]['type'] == 'file': line so last subfolder set as current working directory would be updated to the parent directory as looping subfolders is completed and started looping filesProspective
A
5

Using ftputil, a fast solution could be:

def download(folder):
    for item in ftp.walk(folder):
        print("Creating dir " + item[0])
        os.mkdir(item[0])
        for subdir in item[1]:
            print("Subdirs " +  subdir)
        for file in item[2]:
            print(r"Copying File {0} \ {1}".format(item[0], file))
            ftp.download(ftp.path.join(item[0],file), os.path.join(item[0],file))
Adduction answered 3/6, 2016 at 19:57 Comment(0)
D
1

It is non-trivial at least. In the simplest case, you only assume you have files and directories. This isn't always the case, there are softlinks and hardlinks and Windows-style shortcut. Softlink and directory shortcut are particularly problematic since they make recursive directory possible, which would confuse naive-ly implemented ftp grabber.

How would you handle such recursive directory depends on your need; you might simply not follow softlinks or you might try to detect recursive links. Detecting recursive link is inherently tricky, you cannot do it reliably.

Decease answered 10/4, 2010 at 1:45 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.