Consistent naming: items are actually nodes
[pysize.git] / core / pysize_fs_tree.py
blob1161ddae0a795ae7aef72da6af90b3b3dbebce50
1 import os
2 import stat
4 from pysize_global_fs_cache import get_dev_ino, cache_add_dir
5 from pysize_global_fs_cache import cache_add_hardlink, cache_get_dir_size
7 def _browse_directory(path):
8 previous_cwd = os.open('.', os.O_RDONLY)
9 os.chdir(path)
10 for child in os.listdir('.'):
11 yield child
12 os.fchdir(previous_cwd)
13 os.close(previous_cwd)
15 def _size_fast(path, dir_ino):
16 """Return the size of the file or directory at path, using or updating the
17 cache. dir_ino is the inode of the directory containing the path, it is
18 used only with hardlinks.
19 """
20 try:
21 st = os.lstat(path)
22 if stat.S_ISDIR(st.st_mode): # Directory
23 dev_ino = (st.st_dev, st.st_ino)
24 cached_size = cache_get_dir_size(dev_ino)
25 if cached_size is None:
26 size = st.st_blocks * 512
27 dir_ino = st.st_ino
28 for p in _browse_directory(path):
29 size += _size_fast(p, dir_ino)
30 cache_add_dir(dev_ino, size)
31 else:
32 size = cached_size
33 elif st.st_nlink > 1: # Hardlink
34 if cache_add_hardlink(st.st_dev, st.st_ino, dir_ino, path):
35 size = st.st_blocks * 512
36 else:
37 size = 0
38 else: # File
39 size = st.st_blocks * 512
40 return size
41 except OSError, e:
42 print 'size', e
43 return 0
45 def _size(path):
46 """Same as _size_fast(path, dir_ino), except that dir_ino is computed."""
47 def parent_path():
48 """Return the parent directory path, taking into account '/'
49 and '.'."""
50 elements = path.split('/')
51 length = len(elements)
52 found = False
53 for i in xrange(length - 1, -1, -1):
54 if elements[i] not in ('', '.'):
55 if found:
56 del elements[i+1:]
57 return '/'.join(elements)
58 found = True
59 if path[0] == '/':
60 return '/'
61 if os.path.samefile(path, '.'):
62 return os.path.dirname(os.getcwd())
63 return '.'
64 parent = parent_path()
65 return _size_fast(path, get_dev_ino(parent)[1])
67 class _pysize_node:
68 """The parent class of all displayed nodes, as these nodes are displayed on
69 screen, there should be few instances."""
70 def __init__(self, parent, basename):
71 self.rectangle = (0, 0, 0, 0)
72 self.parent = parent
73 if basename:
74 self.basename = basename
75 self.size = _size(basename)
77 def _compute_height(self):
78 return 0
80 def _compute_depth(self):
81 depth = 0
82 node = self
83 while node:
84 node = node.parent
85 depth += 1
86 return depth
88 def is_dir(self):
89 return False
91 def is_real(self):
92 """Does the node correspond to a path that actually exists?"""
93 return True
95 def get_children(self):
96 """Return the children for a directory."""
97 return []
99 def get_fullpath(self):
100 """Return the fullpath of the node, using its parent node."""
101 fullpath = self.basename
102 parent = self.parent
103 while parent:
104 fullpath = os.path.join(parent.basename, fullpath)
105 parent = parent.parent
106 return fullpath
108 def get_name(self):
109 """Return the name that should be displayed for this node."""
110 return self.basename
112 def __iter__(self):
113 """The iterator is a depth first traversal."""
114 yield self
115 for child in self.get_children():
116 for node in child:
117 yield node
119 def contains_point(self, p):
120 """Is the point p in the graphical representation of this node?"""
121 (x0, x1, y0, y1) = self.rectangle
122 return x0 < p.x and p.x < x1 and y0 < p.y and p.y < y1
124 class _pysize_node_remaining(_pysize_node):
125 """The sum of a directory's children that are too small to be drawn."""
126 def __init__(self, parent, size):
127 _pysize_node.__init__(self, parent, None)
128 self.basename = '..///..'
129 self.size = size
131 def is_real(self):
132 """This node does not actually exists, it is an aggregate."""
133 return False
135 class _pysize_node_dir(_pysize_node):
136 """A directory"""
137 def __init__(self, parent, basename, max_depth, min_size):
138 _pysize_node.__init__(self, parent, basename)
139 self.children = []
140 if max_depth != 0:
141 children_size = 0
142 remaining_size = 0
143 for child in _browse_directory(basename):
144 node = _create_node(self, child, max_depth - 1, min_size)
145 if node.is_real():
146 self.children.append(node)
147 children_size += node.size
148 else:
149 remaining_size += node.size
150 self.children.sort(key=lambda t: t.size, reverse=True)
151 self.size = max(children_size, self.size)
152 if remaining_size > min_size:
153 rem = _pysize_node_remaining(self, remaining_size)
154 self.children.append(rem)
156 def _compute_height(self):
157 if self.children:
158 children_height = max([c._compute_height() for c in self.children])
159 return children_height + 1
160 return 0
162 def is_dir(self):
163 return True
165 def get_children(self):
166 return self.children
168 def get_name(self):
169 return self.basename + '/'
171 class _pysize_node_file(_pysize_node):
172 """A file"""
173 def __init__(self, parent, basename):
174 _pysize_node.__init__(self, parent, basename)
176 class _pysize_node_hardlink(_pysize_node_file):
177 """A hardlink, the canonical one, or a link"""
178 def __init__(self, parent, basename):
179 _pysize_node_file.__init__(self, parent, basename)
181 def _create_node(parent, basename, max_depth, min_size):
182 """Return a pysize_node for parent/basename traversing up to max_depth
183 levels and only taking into account elements bigger than min_size."""
184 size = _size(basename)
185 if size < min_size:
186 node = _pysize_node_remaining(parent, size)
187 else:
188 st = os.lstat(basename)
189 if stat.S_ISDIR(st.st_mode):
190 node = _pysize_node_dir(parent, basename, max_depth, min_size)
191 elif st.st_nlink > 1:
192 node = _pysize_node_hardlink(parent, basename)
193 else:
194 node = _pysize_node_file(parent, basename)
195 return node
197 # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/231503
198 # By David Eppstein
199 def _breadth_first(tree,children=iter):
200 """Traverse the nodes of a tree in breadth-first order.
201 The first argument should be the tree root; children
202 should be a function taking as argument a tree node and
203 returning an iterator of the node's children.
205 yield tree
206 last = tree
207 for node in _breadth_first(tree,children):
208 for child in children(node):
209 yield child
210 last = child
211 if last is node:
212 return
214 class pysize_tree:
215 """The entry point to a tree of pysize_node."""
216 def __init__(self, path, max_depth, min_size_ratio):
217 self.fullpath = path
218 min_size = int(min_size_ratio * _size(path))
219 self.root = _create_node(None, path, max_depth, min_size)
220 if self.root:
221 self.height = self.root._compute_height()
222 else:
223 self.height = 0
225 def get_next_sibling(self, node):
226 """Return the next pysize_node in node's level."""
227 is_next = False
228 for child in _breadth_first(self.root, lambda c: c.get_children()):
229 if is_next:
230 if child._compute_depth() == node._compute_depth():
231 return child
232 return
233 is_next = child is node
235 def get_previous_sibling(self, node):
236 """Return the previous pysize_node in node's level."""
237 prev = None
238 for child in _breadth_first(self.root, lambda c: c.get_children()):
239 if child is node:
240 if prev._compute_depth() == node._compute_depth():
241 return prev
242 return
243 prev = child
245 def get_first_child(self, node):
246 """Return the first pysize_node in node's children."""
247 children = node.get_children()
248 if children:
249 return children[0]
251 def get_parent(self, node):
252 """Return the saved parent of node."""
253 return node.parent
255 def _main():
256 tree = pysize_tree('/usr/share/doc', 4, 0.1)
257 print 'height', tree.height
259 if __name__ == '__main__':
260 _main()