I needed something for a recent project and figured I could build on the OP's solution a bit. It allows for comment patterns, quotes and brackets to be checked, whilst ignoring the surrounding text. I've purposefully made it more generic than it needs to be so that others can take what they want and cut out what they don't.
"""
This module is for testing bracket pairings within a given string
Tested with Python 3.5.4
>>> regexp = getRegexFromList(opening + closing)
>>> print(regexp)
(\\<\\-\\-|\\-\\-\\>|\\/\\*|\\/\\/|\\*\\/|\\#|\\"|\\'|\\(|\\[|\\{|\\<|\\\n|\\\n|\\"|\\'|\\)|\\]|\\}|\\>)
>>> test_string = 'l<--([0])-->1/*{<2>}*/3//<--4 &-->\\n5#"6"\\n7"/*(8)*/"9\'"10"\'11({12\ta})13[<14>]'
>>> patterns = re.findall(regexp, test_string)
>>> print(patterns)
['<--', '(', '[', ']', ')', '-->', '/*', '{', '<', '>', '}', '*/', '//', '<--', '-->', '\\n', '#', '"', '"', '\\n', '"', '/*', '(', ')', '*/', '"', '(', '{', '}', ')', '[', '<', '>', ']']
>>> doBracketsMatch(patterns)
True
>>> doBracketsMatch(['"', ')', '"', '[', ']', '\\''])
False
"""
# Dependencies
import re
# Global Variables
# Provide opening and closing patterns, along with their priorities & whether a priority is nestable
opening = ['<--', '/*', '//', '#', '"', '\'', '(', '[', '{', '<']
closing = ['-->', '*/', '\n', '\n', '"', '\'', ')', ']', '}', '>']
priority = [ 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
nestable = {0: True, 1: False}
bracket_pairs = dict(zip(opening + closing, \
[[(closing + opening)[i], (priority + priority)[i]] \
for i in range(0, opening.__len__() * 2)]))
def getRegexFromList(listOfPatterns):
"""
Generate the search term for the regular expression
:param listOfPatterns:
:return:
>>> getRegexFromList(['"', '<--', '##', 'test'])
'(\\\\t\\\\e\\\\s\\\\t|\\\\<\\\\-\\\\-|\\\\#\\\\#|\\\\")'
"""
# Longer patterns first to prevent false negatives
search_terms = sorted(listOfPatterns, key=len, reverse=True)
regex = ""
for term in search_terms:
for char in str(term):
regex = regex + '\\' + char # Search for all characters literally
regex = regex + '|' # Search pattern = (a|b|c)
return '(' + regex[:-1] + ')' # Remove excess '|' and add brackets
def doBracketsMatch(list_of_brackets):
"""
Determine if brackets match up
:param list_of_brackets:
:return:
"""
stack = []
for bracket in list_of_brackets:
# Check empty stack conditions
if stack.__len__() is 0:
# Check for openings first to catch quotes
if bracket in opening:
stack.append(bracket)
elif bracket in closing:
return False
else:
continue
# Check for a matching bracket
elif bracket == bracket_pairs[stack[-1]][0]:
stack.pop()
# Ignore cases:
# - False positives
# - Lower priority brackets
# - Equal priority brackets if nesting is not allowed
elif bracket not in bracket_pairs or \
bracket_pairs[bracket][1] < bracket_pairs[stack[-1]][1] or \
(bracket_pairs[bracket][1] == bracket_pairs[stack[-1]][1] and \
not nestable[bracket_pairs[bracket][1]]):
continue
# New open bracket
elif bracket in opening:
stack.append(bracket)
# Otherwise, unpaired close bracket
else:
return False
# If stack isn't empty, then there is an unpaired open bracket
return not bool(stack)
if __name__ == '__main__':
import doctest
doctest.testmod()