Source code for interval_set.interval_set

"""
Functions to manage and convert intervals set.

An interval is a tuple (begin, end). An interval of 1 element where eID is the
element ID is formated (eID, eID).

An interval set is a list of non overlapping intervals.
"""
#
# Conversion operations
#

#
# String conversion
#


[docs]def interval_set_to_string(intervals, separator=" "): ''' Convert interval set to strings: >>> interval_set_to_string([(1, 2), (5, 5), (10, 50)]) '1-2 5 10-50' ''' res = '' for (begin, end) in intervals: if begin == end: res += separator + str(begin) else: res += separator + '{}-{}'.format(begin, end) return res.strip(separator)
[docs]def string_to_interval_set(s, separator=" "): """Transforms a string interval set representation to interval set >>> string_to_interval_set("1 2 3 7-9 13") [(1, 1), (2, 2), (3, 3), (7, 9), (13, 13)] >>> string_to_interval_set("") [] >>> string_to_interval_set("(2,3)") Traceback (most recent call last): ... ValueError: Bad interval format. Parsed string is: (2,3) """ intervals = [] if not s: return [] try: res_str = s.split(separator) if '-' in (separator).join(res_str): # it is already intervals so get it directly for inter in res_str: splitted = inter.split('-') if len(splitted) == 2: (begin, end) = splitted intervals.append((int(begin), int(end))) else: intervals.append((int(inter), int(inter))) else: res = sorted([int(x) for x in res_str]) intervals = id_list_to_iterval_set(res) except (ValueError, IndexError): raise ValueError("Bad interval format. Parsed string is: {}".format(s)) return intervals
# # ID list conversion #
[docs]def id_list_to_iterval_set(ids): """Convert list of ID (int) to an intervals set""" itvs = [] if ids: b = ids[0] e = ids[0] for i in ids: if i > (e + 1): # end itv and prepare new itv itvs.append((b, e)) b = i e = i itvs.append((b, e)) return itvs
[docs]def interval_set_to_id_list(itvs): """ Convert an interval set to a list of ID (int)""" ids = [] for itv in itvs: b, e = itv ids.extend(range(b, e + 1)) return ids
# # Set conversion #
[docs]def interval_set_to_set(intervals): """ Convert interval set to python set >>> interval_set_to_set([]) set() >>> interval_set_to_set([(1, 1), (3, 4)]) {1, 3, 4} """ s = set() for (begin, end) in intervals: for x in range(begin, end+1): s.add(x) return s
[docs]def set_to_interval_set(s): """ Convert python set to interval set >>> set_to_interval_set(set()) [] >>> set_to_interval_set({1, 2, 5, 7, 9, 10, 11}) [(1, 2), (5, 5), (7, 7), (9, 11)] """ intervals = [] l = list(s) l.sort() if len(l) > 0: i = 0 current_interval = [l[i], l[i]] i += 1 while i < len(l): if l[i] == current_interval[1] + 1: current_interval[1] = l[i] else: intervals.append(tuple(current_interval)) current_interval = [l[i], l[i]] i += 1 if current_interval not in intervals: intervals.append(tuple(current_interval)) return intervals
# # Info operations #
[docs]def total(itvs): ''' Compute the total number of element by a cumulative sum on the size of all intervals >>> total([]) 0 >>> total([(0, 0)]) 1 >>> total([(1, 1), (3, 4)]) 3 ''' # Add +1 because it is a closed interval return sum([(end - begin) + 1 for begin, end in itvs])
# # Ensemble operations #
[docs]def equals(itvs1, itvs2): """ Check for equality between two interval sets TODO: this version is working bu it is not optimized... >>> equals([],[]) True >>> equals([(1, 1)],[(1, 2)]) False >>> equals([(1, 10)],[]) False >>> equals([(1, 2), (3, 4)], [(1, 4)]) True >>> equals([(5, 100), (3, 4)], [(3, 4), (5, 100)]) True """ return interval_set_to_set(itvs1) == interval_set_to_set(itvs2)
[docs]def difference(itvs_base, itvs2): """ returns the difference between an interval set and an other >>> difference([], [(1, 1)]) [] >>> difference([(1, 1), (3, 4)], [(1, 2), (4, 7)]) [(3, 3)] >>> difference([(1, 12)], [(1, 2), (4, 7)]) [(3, 3), (8, 12)] """ itvs1 = [(k, v) for (k, v) in itvs_base] lx = len(itvs1) ly = len(itvs2) i = 0 k = 0 itvs = [] while (i < lx) and (lx > 0): x = itvs1[i] if (k == ly): itvs.append(x) i += 1 else: y = itvs2[k] # y before x w/ no overlap if (y[1] < x[0]): k += 1 else: # x before y w/ no overlap if (y[0] > x[1]): itvs.append(x) i += 1 else: if (y[0] > x[0]): if (y[1] < x[1]): # x overlap totally y itvs.append((x[0], y[0] - 1)) itvs1[i] = (y[1] + 1, x[1]) k += 1 else: # x overlap partially itvs.append((x[0], y[0] - 1)) i += 1 else: if (y[1] < x[1]): # x overlap partially itvs1[i] = (y[1] + 1, x[1]) k += 1 else: # y overlap totally x i += 1 return itvs
[docs]def intersection(itvs1, itvs2): """Returns an interval set that is an intersection of itvs1 and itvs2. >>> intersection([(1, 2), (4, 5)], [(1, 3), (5, 7)]) [(1, 2), (5, 5)] >>> intersection([(2, 3), (5, 7)], [(1, 1), (4, 4)]) [] >>> intersection([(3, 7)], [(2, 8)]) [(3, 7)] >>> intersection([(3, 7)], [(2, 6)]) [(3, 6)] """ lx = len(itvs1) ly = len(itvs2) i = 0 k = 0 itvs = [] while (i < lx) and (lx > 0) and (ly > 0): x = itvs1[i] if (k == ly): break else: y = itvs2[k] # y before x w/ no overlap if (y[1] < x[0]): k += 1 else: # x before y w/ no overlap if (y[0] > x[1]): i += 1 else: if (y[0] >= x[0]): if (y[1] <= x[1]): itvs.append(y) k += 1 else: itvs.append((y[0], x[1])) i += 1 else: if (y[1] <= x[1]): itvs.append((x[0], y[1])) k += 1 else: itvs.append(x) i += 1 return itvs
[docs]def union(itvs1, itvs2): """ Do the union of two interval sets TODO: this version is working bu it is not optimized... >>> union([(1, 1), (3, 4)], [(1, 2), (4, 7)]) [(1, 7)] """ intersect = intersection(itvs1, itvs2) diff12 = difference(itvs1, itvs2) diff21 = difference(itvs2, itvs1) union = aggregate(intersect + diff12 + diff21) return union
[docs]def aggregate(itvs): """Aggregate *NOT overlapping* intervals (intersect must be empty) to remove gaps. >>> aggregate([]) [] >>> aggregate([(1, 2), (3, 4)]) [(1, 4)] >>> aggregate([(3, 4), (1, 2)]) [(1, 4)] """ lg = len(itvs) if lg <= 1: return itvs # sort intervals itvs = sorted(itvs) res = [] i = 1 a, b = itvs[0] while True: if i == lg: res.append((a, b)) return res else: x, y = itvs[i] if x == (b + 1): b = y else: res.append((a, b)) a, b = x, y i += 1