I wrote github.com/TamaMcGlinn/submodule_bundler to do this. There are a lot of corner cases, and I doubt that I have gotten them all. Please try it out and if any fixes are necessary for your use case, open an issue on the project.
For posterity, I will list all of the code in the project above; but you should just clone directly from github.
bundle.py
#!/usr/bin/env python3
""" Create bundles for submodules """
import os
import argparse
import subprocess
import tarfile
import submodule_commits
import string
import random
import shutil
parser = argparse.ArgumentParser(description='Create bundles for submodules (recursively), \
to facilitate sneakernet connections. On the online computer, \
a bundle is made for each repository, and then packed into a .tar file. \
On the offline computer, use unbundle.py on the tarfile to unzip and \
pull from the corresponding bundle for each repository.')
parser.add_argument('filename', metavar='filename', type=str, help='file to create e.g. ../my_bundles.tar')
parser.add_argument('commit_range', metavar='[baseline]..[target]', type=str, default='..HEAD', nargs='?',
help='commit range of top-level repository to bundle; defaults to everything')
args = parser.parse_args()
class IllegalArgumentError(ValueError):
pass
try:
[baseline, target] = args.commit_range.split('..')
except ValueError:
raise IllegalArgumentError(f"Invalid commit range: '{args.commit_range}': "
+ "Expected [baseline]..[target]. Baseline and target are optional "
+ "but the dots are necessary to distinguish between the two.") from None
full_histories = False
from_str = f'from {baseline} '
if baseline == '':
print("No baseline (all bundles will be complete history bundles)")
full_histories = True
from_str = "from scratch "
if target == '':
target = 'HEAD'
print('Making bundles to update ' + from_str + f'to {target}')
updates_required = {}
new_submodules = {}
bundles = []
for submodule in submodule_commits.submodule_commits('.', target):
new_submodules[submodule['subdir']] = submodule['commit']
root_dir = os.getcwd()
tar_file_name = os.path.basename(args.filename).split('.')[0]
temp_dir = f'temp_dir_for_{tar_file_name}_bundles' # note this won't work if that dir already has contents
def create_bundle(submodule_dir, new_commit_sha, baseline_descriptor=''):
bundle_path_in_temp = f'{submodule_dir}.bundle'
bundle_path = f'{temp_dir}/{bundle_path_in_temp}'
if submodule_dir == '.':
route_to_root = './'
else:
route_to_root = (submodule_dir.count('/') + 1) * '../'
os.makedirs(os.path.dirname(bundle_path), exist_ok=True)
os.chdir(submodule_dir)
rev_parse_output = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
current_branch = rev_parse_output.decode("utf-8").strip('\n')
subprocess.run(['git', 'bundle', 'create', route_to_root + bundle_path,
f'{baseline_descriptor}{current_branch}', '--tags'])
bundles.append(bundle_path_in_temp)
os.chdir(root_dir)
if not full_histories:
for existing_commit in submodule_commits.submodule_commits('.', baseline):
baseline_commit = existing_commit['commit']
submodule_dir = existing_commit['subdir']
new_commit_sha = new_submodules.pop(submodule_dir, None)
if new_commit_sha is None:
# the submodule was removed, don't need to make any bundle
continue
if new_commit_sha == baseline_commit:
# no change, no bundle
continue
print(f"Need to update {submodule_dir} from {baseline_commit} to {new_commit_sha}")
create_bundle(submodule_dir, new_commit_sha, f'{baseline_commit}..')
for submodule_dir, commit_sha in new_submodules.items():
print(f"New submodule {submodule_dir}")
bundle_name = f'{submodule_dir}.bundle'
create_bundle(submodule_dir, commit_sha)
# the bundle of the top-level repository itself is oddly called '..bundle'
# it is impossible to have a submodule that clashes with this
# because you cannot name a directory '.'
baseline_descriptor = ''
if not full_histories:
baseline_descriptor = f'{baseline}..'
create_bundle('.', target, baseline_descriptor)
print("Packing bundles into tarfile:")
with tarfile.open(args.filename, mode="w:") as tar: # no compression; git already does that
os.chdir(temp_dir)
for bundle in bundles:
print(bundle)
tar.add(bundle)
os.chdir(root_dir)
print("Removing temp directory")
shutil.rmtree(temp_dir)
unbundle.py
#!/usr/bin/env python3
""" Extract bundles for submodules """
import os
import argparse
import shutil
import tarfile
import pullbundle
import submodule_commits
import subprocess
parser = argparse.ArgumentParser(description='Create bundles for submodules (recursively), \
to facilitate sneakernet connections. On the online computer, \
a bundle is made for each repository, and then packed into a .tar file. \
On the offline computer, use unbundle.py on the tarfile to unzip and \
pull from the corresponding bundle for each repository.')
parser.add_argument('filename', metavar='filename', type=str, help='file to create e.g. ../my_bundles.tar')
args = parser.parse_args()
tar_file_name = os.path.basename(args.filename).split('.')[0]
temp_dir = f'temp_dir_for_{tar_file_name}_extraction'
with tarfile.open(args.filename, 'r:') as tar:
tar.extractall(temp_dir)
root_dir = os.getcwd()
def is_git_repository(dir):
""" Return true iff dir exists and is a git repository (by checking git rev-parse --show-toplevel) """
if not os.path.exists(dir):
return False
previous_dir = os.getcwd()
os.chdir(dir)
rev_parse_toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'])
git_dir = rev_parse_toplevel.decode("utf-8").strip('\n')
current_dir = os.getcwd().replace('\\', '/')
os.chdir(previous_dir)
return current_dir == git_dir
pullbundle.pullbundle(f'{temp_dir}/..bundle', True)
for submodule in submodule_commits.submodule_commits():
subdir = submodule["subdir"]
commit = submodule["commit"]
print(f'{subdir} -> {commit}')
bundle_file_from_root = f'{temp_dir}/{subdir}.bundle'
if not os.path.isfile(bundle_file_from_root):
print(f'Skipping submodule {subdir} because there is no bundle')
else:
if not is_git_repository(subdir):
# clone first if the subdir doesn't exist or isn't a git repository yet
subprocess.run(['git', 'clone', bundle_file_from_root, subdir])
route_to_root = (subdir.count('/') + 1) * '../'
bundle_file = f'{route_to_root}{bundle_file_from_root}'
os.chdir(subdir)
pullbundle.pullbundle(bundle_file)
os.chdir(root_dir)
print("Removing temp directory")
shutil.rmtree(temp_dir)
subprocess.run(['git', 'submodule', 'update', '--recursive'])
pullbundle.py
#!/usr/bin/env python3
""" Pull from bundles """
import argparse
import subprocess
import re
import os
ref_head_regex = 'refs/heads/(.*)'
head_commit = None
class UnableToFastForwardError(RuntimeError):
pass
def iterate_branches(bundle_refs):
""" Given lines of output from 'git bundle unbundle' this writes the HEAD commit to the head_commit global
and yields each branch, commit pair """
global head_commit
for bundle_ref in bundle_refs:
ref_split = bundle_ref.split()
commit = ref_split[0]
ref_name = ref_split[1]
if ref_name == 'HEAD':
head_commit = commit
else:
match = re.search(ref_head_regex, ref_name)
if match:
branch_name = match.group(1)
yield (branch_name, commit)
def update_branch(branch, commit, check_divergence=False):
""" Update branch to commit if possible by fast-forward """
rev_parse_branch_output = subprocess.check_output(['git', 'rev-parse', branch])
old_commit = rev_parse_branch_output.decode("utf-8").strip('\n')
if old_commit == commit:
print(f'Skipping {branch} which is up-to-date at {commit}')
else:
rev_parse_current_output = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
current_branch = rev_parse_current_output.decode("utf-8").strip('\n')
returncode = subprocess.call(['git', 'merge-base', '--is-ancestor', branch, commit])
branch_is_behind_commit = returncode == 0
if branch_is_behind_commit:
print(f'Fast-forwarding {branch} from {old_commit} to {commit}')
if current_branch == branch:
subprocess.call(['git', 'reset', '--hard', '-q', commit])
else:
subprocess.call(['git', 'branch', '-Dq', branch])
subprocess.run(['git', 'branch', '-q', branch, commit])
else:
returncode = subprocess.call(['git', 'merge-base', '--is-ancestor', commit, branch])
branch_is_ahead_of_commit = returncode == 0
if branch_is_ahead_of_commit:
print(f'Skipping {branch} which is at {old_commit}, ahead of bundle version {commit}')
if current_branch == branch and check_divergence:
raise UnableToFastForwardError("Unable to update branch: already ahead of bundle") from None
else:
print(f'Error: {branch} already exists, at {old_commit} which diverges from '
+ f'bundle version at {commit}')
print('You could switch to the bundle version as follows, but you might lose work.')
print(f'git checkout -B {branch} {commit}')
if current_branch == branch and check_divergence:
raise UnableToFastForwardError("Unable to update branch: diverged from bundle") from None
def checkout(commit):
subprocess.run(['git', 'checkout', '-q', '-f', commit])
def pullbundle(bundle_file, check_divergence=False):
""" Main function; update all branches from given bundle file """
global head_commit
head_commit = None
subprocess.run(['git', 'fetch', bundle_file, '+refs/tags/*:refs/tags/*'], stderr=subprocess.DEVNULL)
unbundle_output = subprocess.check_output(['git', 'bundle', 'unbundle', bundle_file])
bundle_refs = filter(None, unbundle_output.decode("utf-8").split('\n'))
for branch, commit in iterate_branches(bundle_refs):
returncode = subprocess.call(['git', 'show-ref', '-q', '--heads', branch])
branch_exists = returncode == 0
if branch_exists:
update_branch(branch, commit, check_divergence)
else:
print(f'Created {branch} pointing at {commit}')
subprocess.run(['git', 'branch', branch, commit])
checkout(commit)
if head_commit is not None:
# checkout as detached head without branch
# note this might not happen; if the bundle updates a bunch of branches
# then whichever one we were already on is updated already and we don't need to do anything here
checkout(head_commit)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Update all branches and tags contained in a bundle file')
parser.add_argument('filename', metavar='filename', help='git bundle file to pull e.g. ../foo.bundle')
parser.add_argument('-c', '--check_divergence', help="return an errorcode if the current branch was not updated "
+ "because of already being ahead or having diverged from the bundle version of that branch",
action='store_true')
args = parser.parse_args()
pullbundle(args.filename, args.check_divergence)
submodule_commits.py
#!/usr/bin/env python3
""" Print the commit of each submodule (recursively) at some commit"""
import os
import argparse
import subprocess
import re
def print_submodule_commits(root_subdir, root_commit):
for result in submodule_commits(root_subdir, root_commit):
print(f'{result["subdir"]} {result["commit"]}')
def submodule_commits(subdir='.', commit='HEAD', prefix=''):
is_subdir = subdir != '.'
if is_subdir:
previous_dir = os.getcwd()
os.chdir(subdir)
git_ls_tree = subprocess.check_output(['git', 'ls-tree', '-r', commit])
ls_tree_lines = filter(None, git_ls_tree.decode("utf-8").split("\n"))
submodule_regex = re.compile(r'^[0-9]+\s+commit')
for line in ls_tree_lines:
if submodule_regex.match(line):
line_split = line.split()
commit_hash = line_split[2]
subdirectory = line_split[3]
submodule_prefix = subdirectory
if prefix != '':
submodule_prefix = f'{prefix}/{subdirectory}'
yield {'subdir': submodule_prefix, 'commit': commit_hash}
yield from submodule_commits(subdirectory, commit_hash, submodule_prefix)
if is_subdir:
os.chdir(previous_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Print the commit of each submodule (recursively) at some commit')
parser.add_argument('commit', metavar='commit_hash', type=str, default='HEAD', nargs='?',
help='commit to examine; defaults to HEAD')
args = parser.parse_args()
print_submodule_commits('.', args.commit)