intranges.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. """
  2. Given a list of integers, made up of (hopefully) a small number of long runs
  3. of consecutive integers, compute a representation of the form
  4. ((start1, end1), (start2, end2) ...). Then answer the question "was x present
  5. in the original list?" in time O(log(# runs)).
  6. """
  7. import bisect
  8. def intranges_from_list(list_):
  9. """Represent a list of integers as a sequence of ranges:
  10. ((start_0, end_0), (start_1, end_1), ...), such that the original
  11. integers are exactly those x such that start_i <= x < end_i for some i.
  12. Ranges are encoded as single integers (start << 32 | end), not as tuples.
  13. """
  14. sorted_list = sorted(list_)
  15. ranges = []
  16. last_write = -1
  17. for i in range(len(sorted_list)):
  18. if i+1 < len(sorted_list):
  19. if sorted_list[i] == sorted_list[i+1]-1:
  20. continue
  21. current_range = sorted_list[last_write+1:i+1]
  22. ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
  23. last_write = i
  24. return tuple(ranges)
  25. def _encode_range(start, end):
  26. return (start << 32) | end
  27. def _decode_range(r):
  28. return (r >> 32), (r & ((1 << 32) - 1))
  29. def intranges_contain(int_, ranges):
  30. """Determine if `int_` falls into one of the ranges in `ranges`."""
  31. tuple_ = _encode_range(int_, 0)
  32. pos = bisect.bisect_left(ranges, tuple_)
  33. # we could be immediately ahead of a tuple (start, end)
  34. # with start < int_ <= end
  35. if pos > 0:
  36. left, right = _decode_range(ranges[pos-1])
  37. if left <= int_ < right:
  38. return True
  39. # or we could be immediately behind a tuple (int_, end)
  40. if pos < len(ranges):
  41. left, _ = _decode_range(ranges[pos])
  42. if left == int_:
  43. return True
  44. return False