A similar answer to Edmunds, tested, including support for intervals like (1,1):
class MultiSet(object):
def __init__(self, intervals):
self.intervals = intervals
self.events = None
def split_ranges(self):
self.events = []
for start, stop, symbol in self.intervals:
self.events.append((start, True, stop, symbol))
self.events.append((stop, False, start, symbol))
def event_key(event):
key_endpoint, key_is_start, key_other, _ = event
key_order = 0 if key_is_start else 1
return key_endpoint, key_order, key_other
self.events.sort(key=event_key)
current_set = set()
ranges = []
current_start = -1
for endpoint, is_start, other, symbol in self.events:
if is_start:
if current_start != -1 and endpoint != current_start and \
endpoint - 1 >= current_start and current_set:
ranges.append((current_start, endpoint - 1, current_set.copy()))
current_start = endpoint
current_set.add(symbol)
else:
if current_start != -1 and endpoint >= current_start and current_set:
ranges.append((current_start, endpoint, current_set.copy()))
current_set.remove(symbol)
current_start = endpoint + 1
return ranges
if __name__ == '__main__':
intervals = [
(0, 100, 'a'), (0, 75, 'b'), (75, 80, 'd'), (95, 150, 'c'),
(120, 130, 'd'), (160, 175, 'e'), (165, 180, 'a')
]
multiset = MultiSet(intervals)
pprint.pprint(multiset.split_ranges())
[(0, 74, {'b', 'a'}),
(75, 75, {'d', 'b', 'a'}),
(76, 80, {'d', 'a'}),
(81, 94, {'a'}),
(95, 100, {'c', 'a'}),
(101, 119, {'c'}),
(120, 130, {'d', 'c'}),
(131, 150, {'c'}),
(160, 164, {'e'}),
(165, 175, {'e', 'a'}),
(176, 180, {'a'})]