I went ahead and wrote a Python script that prints out the number of lines of code added/changed by a user and the average number of lines per change.
Tested on Windows with Python 2.7.2. You can run from the command line - it assumes you have p4 in your path.
Usage: codestats.py -u [username]
It works with git too: codestats.py -u [authorname] -g.
It does some blacklisting to prune out bulk adds (e.g. you just added a library), and also imposes a blacklist on certain types of files (e.g. .HTML files, etc.). Otherwise, it works pretty well.
Hope this helps!
########################################################################
# Script that computes the lines of code stats for a perforce/git user.
########################################################################
import argparse
import logging
import subprocess
import sys
import re
VALID_ARGUMENTS = [
("user", "-u", "--user", "Run lines of code computation for the specified user.", 1),
("change", "-c", "--change", "Just display lines of code in the passed in change (useful for debugging).", 1),
("git", "-g", "--git", "Use git rather than perforce (which is the default versioning system queried).", 0)
]
class PrintHelpOnErrorArgumentParser(argparse.ArgumentParser):
def error(self, message):
logging.error("error: {0}\n\n".format(message))
self.print_help()
sys.exit(2)
def is_code_file(depot_path):
fstat_output = subprocess.Popen(['p4', 'fstat', depot_path], stdout=subprocess.PIPE).communicate()[0].split('\n')
text_file = False
head_type_regex = re.compile('^... headType (\S+)\s*$')
for line in fstat_output:
head_type_line = head_type_regex.match(line)
if head_type_line:
head_type = head_type_line.group(1)
text_file = (head_type.find('text') != -1)
if text_file:
blacklisted_file_types = ['html', 'css', 'twb', 'twbx', 'tbm', 'xml']
for file_type in blacklisted_file_types:
if re.match('^\/\/depot.*\.{}#\d+$'.format(file_type), depot_path):
text_file = False
break
return text_file
def parse_args():
parser = PrintHelpOnErrorArgumentParser()
for arg_name, short_switch, long_switch, help, num_args in VALID_ARGUMENTS:
if num_args != 0:
parser.add_argument(
short_switch,
nargs=num_args,
type=str,
dest=arg_name)
else:
parser.add_argument(
long_switch,
short_switch,
action="store_true",
help=help,
dest=arg_name)
return parser.parse_args()
file_edited_regex = re.compile('^... .*?#\d+ edit\s*$')
file_deleted_regex = re.compile('^... .*?#\d+ delete\s*$')
file_integrated_regex = re.compile('^... .*?#\d+ integrate\s*$')
file_added_regex = re.compile('^... (.*?#\d+) add\s*$')
affected_files_regex = re.compile('^Affected files ...')
outliers = [] # Changes that seem as if they weren't hand coded and merit inspection
def num_lines_in_file(depot_path):
lines = len(subprocess.Popen(['p4', 'print', depot_path], stdout=subprocess.PIPE).communicate()[0].split('\n'))
return lines
def parse_change(changelist):
change_description = subprocess.Popen(['p4', 'describe', '-ds', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n')
parsing_differences = False
parsing_affected_files = False
differences_regex = re.compile('^Differences \.\.\..*$')
line_added_regex = re.compile('^add \d+ chunks (\d+) lines.*$')
line_removed_regex = re.compile('^deleted \d+ chunks (\d+) lines.*$')
line_changed_regex = re.compile('^changed \d+ chunks (\d+) / (\d+) lines.*$')
file_diff_regex = re.compile('^==== (\/\/depot.*#\d+)\s*\S+$')
skip_file = False
num_lines_added = 0
num_lines_deleted = 0
num_lines_changed_added = 0
num_lines_changed_deleted = 0
num_files_added = 0
num_files_edited = 0
for line in change_description:
if differences_regex.match(line):
parsing_differences = True
elif affected_files_regex.match(line):
parsing_affected_files = True
elif parsing_differences:
if file_diff_regex.match(line):
regex_match = file_diff_regex.match(line)
skip_file = not is_code_file(regex_match.group(1))
elif not skip_file:
regex_match = line_added_regex.match(line)
if regex_match:
num_lines_added += int(regex_match.group(1))
else:
regex_match = line_removed_regex.match(line)
if regex_match:
num_lines_deleted += int(regex_match.group(1))
else:
regex_match = line_changed_regex.match(line)
if regex_match:
num_lines_changed_added += int(regex_match.group(2))
num_lines_changed_deleted += int(regex_match.group(1))
elif parsing_affected_files:
if file_added_regex.match(line):
file_added_match = file_added_regex.match(line)
depot_path = file_added_match.group(1)
if is_code_file(depot_path):
lines_in_file = num_lines_in_file(depot_path)
if lines_in_file > 3000:
# Anomaly - probably a copy of existing code - discard this
lines_in_file = 0
num_lines_added += lines_in_file
num_files_added += 1
elif file_edited_regex.match(line):
num_files_edited += 1
return [num_files_added, num_files_edited, num_lines_added, num_lines_deleted, num_lines_changed_added, num_lines_changed_deleted]
def contains_integrates(changelist):
change_description = subprocess.Popen(['p4', 'describe', '-s', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n')
contains_integrates = False
parsing_affected_files = False
for line in change_description:
if affected_files_regex.match(line):
parsing_affected_files = True
elif parsing_affected_files:
if file_integrated_regex.match(line):
contains_integrates = True
break
return contains_integrates
#################################################
# Note: Keep this function in sync with
# generate_line.
#################################################
def generate_output_specifier(output_headers):
output_specifier = ''
for output_header in output_headers:
output_specifier += '| {:'
output_specifier += '{}'.format(len(output_header))
output_specifier += '}'
if output_specifier != '':
output_specifier += ' |'
return output_specifier
#################################################
# Note: Keep this function in sync with
# generate_output_specifier.
#################################################
def generate_line(output_headers):
line = ''
for output_header in output_headers:
line += '--' # for the '| '
header_padding_specifier = '{:-<'
header_padding_specifier += '{}'.format(len(output_header))
header_padding_specifier += '}'
line += header_padding_specifier.format('')
if line != '':
line += '--' # for the last ' |'
return line
# Returns true if a change is a bulk addition or a private change
def is_black_listed_change(user, changelist):
large_add_change = False
all_adds = True
num_adds = 0
is_private_change = False
is_third_party_change = False
change_description = subprocess.Popen(['p4', 'describe', '-s', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n')
for line in change_description:
if file_edited_regex.match(line) or file_deleted_regex.match(line):
all_adds = False
elif file_added_regex.match(line):
num_adds += 1
if line.find('... //depot/private') != -1:
is_private_change = True
break
if line.find('... //depot/third-party') != -1:
is_third_party_change = True
break
large_add_change = all_adds and num_adds > 70
#print "{}: {}".format(changelist, large_add_change or is_private_change)
return large_add_change or is_third_party_change
change_header_regex = re.compile('^Change (\d+)\s*.*?\s*(\S+)@.*$')
def get_user_and_change_header_for_change(changelist):
change_description = subprocess.Popen(['p4', 'describe', '-s', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n')
user = None
change_header = None
for line in change_description:
change_header_match = change_header_regex.match(line)
if change_header_match:
user = change_header_match.group(2)
change_header = line
break
return [user, change_header]
if __name__ == "__main__":
log = logging.getLogger()
log.setLevel(logging.DEBUG)
args = parse_args()
user_stats = {}
user_stats['num_changes'] = 0
user_stats['lines_added'] = 0
user_stats['lines_deleted'] = 0
user_stats['lines_changed_added'] = 0
user_stats['lines_changed_removed'] = 0
user_stats['total_lines'] = 0
user_stats['files_edited'] = 0
user_stats['files_added'] = 0
change_log = []
if args.git:
git_log_command = ['git', 'log', '--author={}'.format(args.user[0]), '--pretty=tformat:', '--numstat']
git_log_output = subprocess.Popen(git_log_command, stdout=subprocess.PIPE).communicate()[0].split('\n')
git_log_line_regex = re.compile('^(\d+)\s*(\d+)\s*\S+$')
total = 0
adds = 0
subs = 0
for git_log_line in git_log_output:
line_match = git_log_line_regex.match(git_log_line)
if line_match:
adds += int(line_match.group(1))
subs += int(line_match.group(2))
total = adds - subs
num_commits = 0
git_shortlog_command = ['git', 'shortlog', '--author={}'.format(args.user[0]), '-s']
git_shortlog_output = subprocess.Popen(git_shortlog_command, stdout=subprocess.PIPE).communicate()[0].split('\n')
git_shortlog_line_regex = re.compile('^\s*(\d+)\s+.*$')
for git_shortlog_line in git_shortlog_output:
line_match = git_shortlog_line_regex.match(git_shortlog_line)
if line_match:
num_commits += int(line_match.group(1))
print "Git Stats for {}: Commits: {}. Lines of code: {}. Average Lines Per Change: {}.".format(args.user[0], num_commits, total, total*1.0/num_commits)
sys.exit(0)
elif args.change:
[args.user, change_header] = get_user_and_change_header_for_change(args.change)
change_log = [change_header]
else:
change_log = subprocess.Popen(['p4', 'changes', '-u', args.user, '-s', 'submitted'], stdout=subprocess.PIPE).communicate()[0].split('\n')
output_headers = ['Current Change', 'Num Changes', 'Files Added', 'Files Edited']
output_headers.append('Lines Added')
output_headers.append('Lines Deleted')
if not args.git:
output_headers.append('Lines Changed (Added/Removed)')
avg_change_size = 0.0
output_headers.append('Total Lines')
output_headers.append('Avg. Lines/Change')
line = generate_line(output_headers)
output_specifier = generate_output_specifier(output_headers)
print line
print output_specifier.format(*output_headers)
print line
output_specifier_with_carriage_return = output_specifier + '\r'
for change in change_log:
change_match = change_header_regex.search(change)
if change_match:
user_stats['num_changes'] += 1
changelist = change_match.group(1)
if not is_black_listed_change(args.user, changelist) and not contains_integrates(changelist):
[files_added_in_change, files_edited_in_change, lines_added_in_change, lines_deleted_in_change, lines_changed_added_in_change, lines_changed_removed_in_change] = parse_change(change_match.group(1))
if lines_added_in_change > 5000 and changelist not in outliers:
outliers.append([changelist, lines_added_in_change])
else:
user_stats['lines_added'] += lines_added_in_change
user_stats['lines_deleted'] += lines_deleted_in_change
user_stats['lines_changed_added'] += lines_changed_added_in_change
user_stats['lines_changed_removed'] += lines_changed_removed_in_change
user_stats['total_lines'] += lines_changed_added_in_change
user_stats['total_lines'] -= lines_changed_removed_in_change
user_stats['total_lines'] += lines_added_in_change
user_stats['files_edited'] += files_edited_in_change
user_stats['files_added'] += files_added_in_change
current_output = [changelist, user_stats['num_changes'], user_stats['files_added'], user_stats['files_edited']]
current_output.append(user_stats['lines_added'])
current_output.append(user_stats['lines_deleted'])
if not args.git:
current_output.append('{}/{}'.format(user_stats['lines_changed_added'], user_stats['lines_changed_removed']))
current_output.append(user_stats['total_lines'])
current_output.append(user_stats['total_lines']*1.0/user_stats['num_changes'])
print output_specifier_with_carriage_return.format(*current_output),
print
print line
if len(outliers) > 0:
print "Outliers (changes that merit inspection - and have not been included in the stats):"
outlier_headers = ['Changelist', 'Lines of Code']
outlier_specifier = generate_output_specifier(outlier_headers)
outlier_line = generate_line(outlier_headers)
print outlier_line
print outlier_specifier.format(*outlier_headers)
print outlier_line
for change in outliers:
print outlier_specifier.format(*change)
print outlier_line