Recursively downloads all the folders in same relative order as source gcs directory
def download_gcs_folder_recursively_to_local(blob_folder_path, destination_folder_path, gcs_project_name, gcs_bucket_name):
# Ensure that the folder paths end with a forward slash ("/").
# This avoids picking up files/folders with the same prefix but in different folders
if not blob_folder_path.endswith("/"):
blob_folder_path = blob_folder_path + "/"
if not destination_folder_path.endswith("/"):
destination_folder_path = destination_folder_path + "/"
# Connect to Google Cloud Storage
storage_client = storage.Client(gcs_project_name)
bucket = storage_client.get_bucket(gcs_bucket_name)
# List blobs (objects/files) in the GCS bucket with a specified prefix (blob_folder_path)
blobs = bucket.list_blobs(prefix=blob_folder_path)
# Create a temporary folder to store downloaded files temporarily.
# This is required when having subdirectories in the folder,
# as the default GCS library download method only works if the folders preexist.
# Therefore, we first create the folder structure, download it to a temp location,
# and then move it to the appropriate location.
os.makedirs("tmp_cp_folder", exist_ok=True)
# Download and Move GCS Blobs
for blob in blobs:
# Skip blobs that represent folders (ends with "/").
if blob.name.endswith("/"):
continue
tmp_filename = blob.name.replace('/', '_')
# Determine the relative file path (relative_file_path) by removing the prefix (blob_folder_path)
relative_file_path = blob.name[len(blob_folder_path):]
# Extract the parent folder of the relative file path
relative_file_parent_folder = "" if len(relative_file_path.split("/")) == 1 else relative_file_path.rsplit('/', 1)[0]
# Download file from blob, create and move to appropriate folders/subfolders
blob.download_to_filename(f"tmp_cp_folder/{tmp_filename}")
os.makedirs(f"{destination_folder_path}{relative_file_parent_folder}", exist_ok=True)
os.system(f"mv tmp_cp_folder/{tmp_filename} {destination_folder_path}{relative_file_path}")
# Remove the temporary folder
os.removedirs("tmp_cp_folder")
Based on one of the earlier solution: https://stackoverflow.com/a/49749281