= [3, 2, 1, 4, 5, 6, 7, 3]
data
def rsq(st, en):
return sum([data[i] for i in range(st, en+1)])
print(rsq(2, 5)) # O(n)
print(rsq(0, 3)) # O(n)
16
10
Given an frequency table, we want to be able to compute the sum of the frequencies in a given range.
Score | Frequency |
---|---|
0 | 3 |
1 | 2 |
2 | 1 |
3 | 4 |
4 | 5 |
5 | 6 |
6 | 7 |
7 | 3 |
How many have score between 2 and 5?
rsq(2, 5) = 1 + 4 + 5 + 6 = 16
The naive approach is to iterate over the array and sum the elements from index i
to index j
.
data = [3, 2, 1, 4, 5, 6, 7, 3]
def rsq(st, en):
return sum([data[i] for i in range(st, en+1)])
print(rsq(2, 5)) # O(n)
print(rsq(0, 3)) # O(n)
16
10
But that’s O(n) time complexity.
If we frequently need to perform range sum queries, then we need a better approach.
Score | Frequency | Accumulated Frequency |
---|---|---|
0 | 3 | 3 |
1 | 2 | 5 |
2 | 1 | 6 |
3 | 4 | 10 |
4 | 5 | 15 |
5 | 6 | 21 |
6 | 7 | 28 |
7 | 3 | 31 |
We can precompute the cummulative frequency table, and then perform range sum queries in O(1) time.
data = [3, 2, 1, 4, 5, 6, 7, 3]
def precompute_accumulated_freq(data):
accum = [0] * len(data)
accum[0] = data[0]
for i in range(1, len(data)):
accum[i] = accum[i-1] + data[i]
return accum
def rsq(accum, st, en):
return accum[en] - accum[st-1] if st > 0 else accum[en]
accum = precompute_accumulated_freq(data) # O(n)
print(rsq(accum, 2, 5)) # O(1)
print(rsq(accum, 0, 3)) # O(1)
16
10
But what if the frequency table is dynamic? i.e. the frequency of a score can change.
We can’t precompute the cummulative frequency table anymore.
class FenwickTree:
def __init__(self, data):
self.data = data
self.tree = [0] * (len(data) + 1)
for i in range(len(data)):
self.update(i, data[i])
def update(self, i, delta):
i += 1
while i < len(self.tree):
self.tree[i] += delta
i += i & -i
def rsq(self, st, en):
return self._sum(en) - self._sum(st-1) if st > 0 else self._sum(en)
def _sum(self, i):
i += 1
res = 0
while i > 0:
res += self.tree[i]
i -= i & -i
return res