Source code for niemads.MultiwayTrie
#! /usr/bin/env python
from collections import deque
from gzip import open as gopen
WORD_IDENT = 'W'
class MWTNode:
'''Node helper class for ``MultiwayTrie``'''
def __init__(self):
'''``MWTNode`` constructor'''
self.word = None
self.children = dict()
self.parent = None
def traverse_preorder(self):
'''Traverse the nodes of this ``MultiwayTrie`` via preorder traversal starting at this node'''
s = deque(); s.append((None, self))
while len(s) != 0:
curr = s.pop(); yield curr
s.extend((k, curr[1].children[k]) for k in sorted(curr[1].children.keys(), reverse=True))
def traverse_postorder(self):
'''Traverse the nodes of this ``MultiwayTrie`` via postorder traversal starting at this node'''
s1 = deque(); s2 = deque(); s1.append((None, self))
while len(s1) != 0:
curr = s1.pop(); s2.append(curr); s1.extend((k, curr[1].children[k]) for k in sorted(curr[1].children.keys(), reverse=True))
while len(s2) != 0:
yield s2.pop()
def newick(self):
'''Newick string conversion starting at this node
Returns:
``str``: Newick string conversion starting at this node
'''
node_to_str = dict()
for letter,node in self.traverse_postorder():
if len(node.children) == 0:
if node.word is None:
raise RuntimeError("Encountered a leaf that is not a word node")
node_to_str[node] = letter + WORD_IDENT
else:
out = ['(']
for c in node.children.values():
out.append(node_to_str[c])
out.append(',')
del node_to_str[c]
out.pop() # trailing comma
out.append(')')
if node.parent is not None:
if node.word is None:
out.append(letter)
else:
out.append(letter + WORD_IDENT)
node_to_str[node] = ''.join(out)
return node_to_str[self]
[docs]class MultiwayTrie:
'''``MultiwayTrie`` class, implemented using ``TreeSwift``'''
def __init__(self, initial=None):
'''``MultiwayTrie`` constructor
Args:
``initial`` (iterable): Strings with which to initialize the ``MultiwayTrie``
'''
self.root = MWTNode(); self.num_elements = 0
if initial is not None:
self.extend(initial)
def __contains__(self, x):
'''Check if an element ``x`` exists in this ``MultiwayTrie``
Args:
``x``: The element to check
Returns:
``bool``: ``True`` if ``x`` exists in this ``MultiwayTrie``, otherwise ``False``
'''
if not isinstance(x, str):
raise TypeError("Elements must be strings")
curr = self.root
for i in range(len(x)):
if x[i] not in curr.children:
return False
curr = curr.children[x[i]]
return (curr.word is not None)
def __iter__(self):
''' Iterate over the elements of this ``MultiwayTrie`` in ascending order'''
for x in self.iter_ascending():
yield x
def __len__(self):
'''Return the number of elements in this ``MultiwayTrie``
Returns:
``int``: The number of elements contained within this ``MultiwayTrie``
'''
return self.num_elements
def __str__(self):
'''Return a Newick string representation of this ``MultiwayTrie``
Returns:
``str``: A Newick string representation of this ``MultiwayTrie``
'''
return '%s;' % self.root.newick()
[docs] def add(self, x):
'''Add a new element ``x`` to this ``MultiwayTrie``
Args:
``x``: The element to insert
'''
if not isinstance(x, str):
raise TypeError("Elements must be strings")
curr = self.root
for i in range(len(x)):
if x[i] not in curr.children:
newnode = MWTNode(); curr.children[x[i]] = newnode; newnode.parent = curr
curr = curr.children[x[i]]
if curr.word is None:
curr.word = x; self.num_elements += 1
[docs] def extend(self, xs):
'''Add each element of ``xs`` to this ``MultiwayTrie``
Args:
``xs``: The elements to insert
'''
for x in xs:
self.add(x)
[docs] def remove(self, x):
'''Remove the element ``x`` from this ``MultiwayTrie``
Args:
``x``: The element to remove
'''
if not isinstance(x, str):
raise TypeError("Elements must be strings")
curr = self.root
for i in range(len(x)):
if x[i] not in curr.children:
return
curr = curr.children[x[i]]
if curr.word is not None:
curr.word = None; self.num_elements -= 1
for i in range(len(x)-1, -1, -1):
if len(curr.children) == 0:
curr = curr.parent; del curr.children[x[i]]
else:
return
[docs] def iter_ascending(self):
'''Iterate over the elements of this ``MultiwayTrie`` in ascending order'''
for letter,node in self.root.traverse_preorder():
if node.word is not None:
yield node.word
[docs] def iter_descending(self):
'''Iterate over the elements of this ``MultiwayTrie`` in descending order'''
for letter,node in self.root.traverse_postorder():
if node.word is not None:
yield node.word
[docs] @staticmethod
def read_tree_newick(newick):
'''Read a tree from a Newick string or file
Args:
``newick`` (``str``): Either a Newick string or the path to a Newick file (plain-text or gzipped)
Returns:
``MultiwayTrie``: The Multiway Trie represented by ``newick``. If the Newick file has multiple trees (one per line), a ``list`` of ``Tree`` objects will be returned
'''
if not isinstance(newick, str):
try:
newick = str(newick)
except:
raise TypeError("newick must be a str")
if newick.lower().endswith('.gz'): # gzipped file
f = gopen(expanduser(newick)); ts = f.read().decode().strip(); f.close()
elif isfile(expanduser(newick)): # plain-text file
f = open(expanduser(newick)); ts = f.read().strip(); f.close()
else:
ts = newick.strip()
lines = ts.splitlines()
if len(lines) != 1:
return [read_tree_newick(l) for l in lines]
'''
try:
t = MultiwayTrie(); n = t.root; i = 0
while i < len(ts):
if ts[i] == '(':
c = MWTNode(); n.add_child(c); n = c
elif ts[i] == ')':
n = n.parent
elif ts[i] == ',':
n = n.parent; c = Node(); n.add_child(c); n = c
elif ts[i] == ':':
i += 1; ls = ''
while ts[i] != ',' and ts[i] != ')' and ts[i] != ';':
ls += ts[i]; i += 1
n.edge_length = float(ls); i -= 1
else:
label = ''
while ts[i] != ':' and ts[i] != ',' and ts[i] != ';' and ts[i] != ')':
label += ts[i]; i += 1
i -= 1; n.label = label
i += 1
except Exception as e:
raise RuntimeError("Failed to parse string as Newick: %s"%ts)
return t
'''
return None # not implemented yet