A B-tree is a self-balancing search tree where each node holds many keys and has many children — designed to minimize disk reads. It's the structure behind virtually every relational database index (PostgreSQL, MySQL/InnoDB, SQLite) and most filesystems.
| Average | Worst | Space | ||||||
|---|---|---|---|---|---|---|---|---|
| Access | Search | Insertion | Deletion | Access | Search | Insertion | Deletion | Worst |
Θ(log n) |
Θ(log n) |
Θ(log n) |
Θ(log n) |
O(log n) |
O(log n) |
O(log n) |
O(log n) |
O(n) |
Each node holds up to 2t−1 keys and 2t children, kept sorted. Search: scan keys in the node, descend into the matching child. Insert: descend; if the target leaf is full, split it and push the median key up. Delete: similar, with merging or borrowing when nodes underfill. Branching factor t is tuned to fit one disk block — typical values are 100s to 1000s.
class BTreeNode:
def __init__(self, t, leaf=False):
self.t = t # minimum degree
self.leaf = leaf
self.keys = []
self.children = []
class BTree:
def __init__(self, t=3):
self.t = t
self.root = BTreeNode(t, leaf=True)
def search(self, key, node=None): # O(log n)
node = node or self.root
i = 0
while i < len(node.keys) and key > node.keys[i]:
i += 1
if i < len(node.keys) and key == node.keys[i]:
return (node, i)
if node.leaf:
return None
return self.search(key, node.children[i])
def insert(self, key): # O(log n)
root = self.root
if len(root.keys) == 2 * self.t - 1:
new = BTreeNode(self.t, leaf=False)
new.children.append(root)
self._split(new, 0)
self.root = new
self._insert_nonfull(new, key)
else:
self._insert_nonfull(root, key)
def _split(self, parent, i):
t = self.t
full = parent.children[i]
new = BTreeNode(t, leaf=full.leaf)
new.keys = full.keys[t:]
if not full.leaf:
new.children = full.children[t:]
full.children = full.children[:t]
parent.keys.insert(i, full.keys[t - 1])
parent.children.insert(i + 1, new)
full.keys = full.keys[:t - 1]
def _insert_nonfull(self, node, key):
i = len(node.keys) - 1
if node.leaf:
while i >= 0 and key < node.keys[i]:
i -= 1
node.keys.insert(i + 1, key)
else:
while i >= 0 and key < node.keys[i]:
i -= 1
i += 1
if len(node.children[i].keys) == 2 * self.t - 1:
self._split(node, i)
if key > node.keys[i]:
i += 1
self._insert_nonfull(node.children[i], key)
bt = BTree(t=3)
for k in [10, 20, 5, 6, 12, 30, 7, 17]:
bt.insert(k)
print(bt.search(12) is not None) # True