Skip to content

Commit 8178b3c

Browse files
committed
Add merge method
Trie.merge method allows two tries to be merged structurally which is often faster than looping over all items and setting them individually in destination trie. The operation doesn’t check that the tries are of the same type so it may result in keys being transformed in one way or another. Issue: google#40
1 parent c2611e8 commit 8178b3c

File tree

4 files changed

+139
-5
lines changed

4 files changed

+139
-5
lines changed

.pylintrc

+6-4
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,19 @@ load-plugins=pylint.extensions.check_elif,
4141
# --enable=similarities". If you want to run only the classes checker, but have
4242
# no Warning level messages displayed, use"--disable=all --enable=classes
4343
# --disable=W"
44-
disable=locally-disabled,
44+
disable=consider-using-f-string,
45+
locally-disabled,
4546
missing-return-type-doc,
4647
missing-type-doc,
4748
missing-yield-type-doc,
49+
no-else-return,
4850
no-self-use,
51+
super-with-arguments,
4952
too-few-public-methods,
5053
too-many-lines,
5154
too-many-public-methods,
52-
useless-object-inheritance,
53-
super-with-arguments,
54-
consider-using-f-string
55+
unidiomatic-typecheck,
56+
useless-object-inheritance
5557

5658

5759
[REPORTS]

pygtrie.py

+92
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,16 @@ def add(self, parent, step):
136136
def require(self, parent, step):
137137
return self.node if self.step == step else self.add(parent, step)
138138

139+
def merge(self, other, queue):
140+
"""Moves children from other into this object."""
141+
if type(other) == _OneChild and other.step == self.step:
142+
queue.append((self.node, other.node))
143+
return self
144+
else:
145+
children = _Children((self.step, self.node))
146+
children.merge(other, queue)
147+
return children
148+
139149
def delete(self, parent, _step):
140150
parent.children = _EMPTY
141151

@@ -172,6 +182,14 @@ def add(self, _parent, step):
172182
def require(self, _parent, step):
173183
return self.setdefault(step, _Node())
174184

185+
def merge(self, other, queue):
186+
"""Moves children from other into this object."""
187+
for step, other_node in other.iteritems():
188+
node = self.setdefault(step, other_node)
189+
if node is not other_node:
190+
queue.append((node, other_node))
191+
return self
192+
175193
def delete(self, parent, step):
176194
del self[step]
177195
if len(self) == 1:
@@ -196,6 +214,24 @@ def __init__(self):
196214
self.children = _EMPTY
197215
self.value = _EMPTY
198216

217+
def merge(self, other, overwrite):
218+
"""Move children from other node into this one.
219+
220+
Args:
221+
other: Other node to move children and value from.
222+
overwrite: Whether to overwrite existing node values.
223+
"""
224+
queue = [(self, other)]
225+
while queue:
226+
lhs, rhs = queue.pop()
227+
if lhs.value is _EMPTY or (overwrite and rhs.value is not _EMPTY):
228+
lhs.value = rhs.value
229+
if lhs.children is _EMPTY:
230+
lhs.children = rhs.children
231+
elif rhs.children is not _EMPTY:
232+
lhs.children = lhs.children.merge(rhs.children, queue)
233+
rhs.children = _EMPTY
234+
199235
def iterate(self, path, shallow, iteritems):
200236
"""Yields all the nodes with values associated to them in the trie.
201237
@@ -484,6 +520,55 @@ def update(self, *args, **kwargs): # pylint: disable=arguments-differ
484520
args = ()
485521
super(Trie, self).update(*args, **kwargs)
486522

523+
def merge(self, other, overwrite=False):
524+
"""Moves nodes from other trie into this one.
525+
526+
The merging happens at trie structure level and as such is different
527+
than iterating over items of one trie and setting them in the other
528+
trie.
529+
530+
The merging may happen between different types of tries resulting in
531+
different (key, value) pairs in the destination trie compared to the
532+
source. For example, merging two :class:`pygtrie.StringTrie` objects
533+
each using different separators will work as if the other trie had
534+
separator of this trie. Similarly, a :class:`pygtrie.CharTrie` may be
535+
merged into a :class:`pygtrie.StringTrie` but when keys are read those
536+
will be joined by the separator. For example:
537+
538+
>>> import pygtrie
539+
>>> st = pygtrie.StringTrie(separator='.')
540+
>>> st.merge(pygtrie.StringTrie({'foo/bar': 42}))
541+
>>> list(st.items())
542+
[('foo.bar', 42)]
543+
>>> st.merge(pygtrie.CharTrie({'baz': 24}))
544+
>>> sorted(st.items())
545+
[('b.a.z', 24), ('foo.bar', 42)]
546+
547+
Not all tries can be merged into other tries. For example,
548+
a :class:`pygtrie.StringTrie` may not be merged into
549+
a :class:`pygtrie.CharTrie` because the latter imposes a requirement for
550+
each component in the key to be exactly one character while in the
551+
former components may be arbitrary length.
552+
553+
Note that the other trie is cleared and any references or iterators over
554+
it are invalidated. To preserve other’s value it needs to be copied
555+
first.
556+
557+
Args:
558+
other: Other trie to move nodes from.
559+
overwrite: Whether to overwrite existing values in this trie.
560+
"""
561+
if isinstance(self, type(other)):
562+
self._merge_impl(self, other, overwrite=overwrite)
563+
else:
564+
other._merge_impl(self, other, overwrite=overwrite) # pylint: disable=protected-access
565+
other.clear()
566+
567+
@classmethod
568+
def _merge_impl(cls, dst, src, overwrite):
569+
# pylint: disable=protected-access
570+
dst._root.merge(src._root, overwrite=overwrite)
571+
487572
def copy(self, __make_copy=lambda x: x):
488573
"""Returns a shallow copy of the object."""
489574
# pylint: disable=protected-access
@@ -1637,6 +1722,13 @@ def fromkeys(cls, keys, value=None, separator='/'): # pylint: disable=arguments
16371722
trie[key] = value
16381723
return trie
16391724

1725+
@classmethod
1726+
def _merge_impl(cls, dst, src, overwrite):
1727+
if not isinstance(dst, StringTrie):
1728+
raise TypeError('%s cannot be merged into a %s' % (
1729+
type(src).__name__, type(dst).__name__))
1730+
super(StringTrie, cls)._merge_impl(dst, src, overwrite=overwrite)
1731+
16401732
def __str__(self):
16411733
if not self:
16421734
return '%s(separator=%s)' % (type(self).__name__, self._separator)

test.py

+38
Original file line numberDiff line numberDiff line change
@@ -1114,5 +1114,43 @@ def __len__(self):
11141114
self.assertNotEqual(trie, dictionary)
11151115

11161116

1117+
class MergeTest(unittest.TestCase):
1118+
"""Tests for merge method."""
1119+
1120+
def test_merge_tries(self):
1121+
trie = pygtrie.Trie({'foo': 1, 'bar': 2})
1122+
1123+
def test(want, src, overwrite=False):
1124+
trie.merge(src, overwrite=overwrite)
1125+
self.assertEqual(0, len(src))
1126+
self.assertEqual(pygtrie.Trie(want), trie)
1127+
1128+
test({'foo': 1, 'bar': 2, 'baz': 3},
1129+
pygtrie.Trie({'bar': 0, 'baz': 3}))
1130+
test({'foo': 1, 'bar': 2, 'baz': 4, 'fo': 5},
1131+
pygtrie.Trie({'baz': 4, 'fo': 5}), overwrite=True)
1132+
test({'foo': 1, 'bar': 2, 'baz': 4, 'fo': 5, 'qux': 6},
1133+
pygtrie.CharTrie({'qux': 6}))
1134+
1135+
st = pygtrie.StringTrie({'foo/bar/baz': 42})
1136+
self.assertRaises(TypeError, trie.merge, st)
1137+
1138+
def test_merge_string_tries(self):
1139+
1140+
def test(want, other, overwrite):
1141+
trie = pygtrie.StringTrie({'foo/bar': 42})
1142+
trie.merge(other, overwrite=overwrite)
1143+
self.assertEqual(want, dict(trie.items()))
1144+
1145+
test({'foo/bar': 42, 'bar/baz': 2},
1146+
pygtrie.StringTrie({'foo.bar': 4, 'bar.baz': 2}, separator='.'),
1147+
False)
1148+
test({'foo/bar': 4, 'bar/baz': 2},
1149+
pygtrie.StringTrie({'foo.bar': 4, 'bar.baz': 2}, separator='.'),
1150+
True)
1151+
test({'foo/bar': 42, 'q/u/x': 2}, pygtrie.Trie({'qux': 2}), False)
1152+
test({'foo/bar': 42, 'q/u/x': 2}, pygtrie.CharTrie({'qux': 2}), False)
1153+
1154+
11171155
if __name__ == '__main__':
11181156
unittest.main()

version-history.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
Version History
22
---------------
33

4-
2.4.3: TBD
4+
2.5.0: TBD
5+
6+
- Add :func:`pygtrie.Trie.merge` which merges structures of two tries.
57

68
- Fix :func:`pygtrie.Trie.__eq__` implementation such that key values
79
are taken into consideration rather than just looking at trie

0 commit comments

Comments
 (0)