added test of len() method for SQLTable
[pygr.git] / pygr / mapping.py
blob0bdfdfeb09f20077e179caadd46d45cdec6a353e
3 from __future__ import generators
4 from schema import *
5 import classutil
8 def update_graph(self,graph):
9 'save nodes and edges of graph to self'
10 for node,d in graph.iteritems():
11 self += node
12 saveDict = self[node]
13 for target,edge in d.iteritems():
14 saveDict[target] = edge
17 class PathList(list):
18 """Internal representation for storing both nodes and edges as list
19 So filter functions can see both nodes and edges"""
20 def __init__(self,nodes=None,edges=None):
21 if nodes!=None:
22 list.__init__(self,nodes)
23 else:
24 list.__init__(self)
25 if edges!=None:
26 self.edge=list(edges)
27 else:
28 self.edge=[]
30 def append(self,val):
31 list.append(self,val)
32 self.edge.append(val)
34 def extend(self,l):
35 list.extend(self,l) # EXTEND TOP-LEVEL LIST AS USUAL
36 try: # EXTEND OUR EDGE LIST AS WELL
37 self.edge.extend(l.edge)
38 except AttributeError: #IF l HAS NO EDGES, PAD OUR EDGE LIST WITH Nones
39 self.edge.extend(len(l)*[None])
43 class Edge(list):
44 "Interface to edge information."
45 isDirected=False
46 def __init__(self,graph,nodes,edgeInfo):
47 self.graph=graph
48 if edgeInfo:
49 self.edgeInfo=edgeInfo
50 list.__init__(self,nodes) # SAVE NODES AS TUPLE
53 def __getattr__(self,attr):
54 try:
55 return getattr(self.edgeInfo,attr)
56 except AttributeError:
57 if isinstance(self.edgeInfo,types.DictType):
58 return self.edgeInfo[attr] # TREAT edgeInfo AS ATTRIBUTE DICTIONARY
59 raise AttributeError(attr)
61 # SHOULD WE DEFINE A setattr HERE TOO, TO ALLOW USER TO ADD NEW ATTRIBUTE VALUES??
62 # setattr IS PAINFUL TO IMPLEMENT, BECAUSE OF THE RECURSIVE REFERENCE PROBLEM
64 def __cmp__(self,other): # DO WE NEED TO COMPARE EDGE INFO??
65 if not isinstance(other,Edge): # CAN ONLY COMPARE A PAIR OF EDGES
66 return -1
67 diff=cmp(self.graph,other.graph)
68 if diff: # NOT IN THE SAME GRAPH...
69 return diff
70 elif self.isDirected: # IF DIRECTED, JUST COMPARE IN CURRENT ORDER
71 return tuple.__cmp__(self,other)
72 else: # UNDIRECTED COMPARISON REQUIRES PUTTING BOTH IN SAME ORDER
73 me=[i for i in self]
74 you=[i for i in other]
75 me.sort()
76 you.sort()
77 return cmp(me,you)
79 # NEEDS SCHEMA SUPPORT: RETURN A SINGLE SCHEMA TUPLE DESCRIBING THIS EDGE.
81 class DirectedEdge(Edge):
82 isDirected=True
85 # need a class to provide access to the edges in a graph
86 # iterator, membership test
87 #class EdgeSet
91 class dictEdge(dict):
92 """2nd layer graph interface implemenation using dict.
93 """
94 dictClass=dict
95 def __init__(self,graph,fromNode):
96 self.graph=graph
97 self.fromNode=fromNode
98 self.dictClass.__init__(self) # INITIALIZE TOPLEVEL DICTIONARY
100 def __iadd__(self,target):
101 "Add edge from fromNode to target with no edge-info"
102 self[target]=None
103 return self # THIS IS REQUIRED FROM iadd()!!
105 def __setitem__(self,target,edgeInfo):
106 "Add edge from fromNode to target with edgeInfo"
107 self.dictClass.__setitem__(self,target,edgeInfo)
108 if target not in self.graph: # ADD NEW NODE TO THE NODE DICT
109 self.graph+=target
111 _setitem_=dict.__setitem__ # INTERNAL INTERFACE FOR SAVING AN ENTRY
113 def __delitem__(self,target):
114 "Delete edge from fromNode to target"
115 try:
116 self.dictClass.__delitem__(self,target)
117 except KeyError: # GENERATE A MORE INFORMATIVE ERROR MESSAGE
118 raise KeyError('No edge from node to target')
120 def __isub__(self,target):
121 "Delete edge from fromNode to target"
122 self.__delitem__(target)
123 return self # THIS IS REQUIRED FROM iadd()!!
125 def edges(self):
126 "Return iterator for accessing edges from fromNode"
127 for target,edgeInfo in self.items():
128 if isinstance(edgeInfo,Edge):
129 yield edgeInfo
130 else:
131 yield Edge(self.graph,(self.fromNode,target,edgeInfo),edgeInfo)
135 class dictGraph(dict):
136 """Top layer graph interface implemenation using dict.
138 dictClass=dict
139 edgeDictClass=dictEdge
140 def __init__(self,schema=None,domain=None,range=None):
141 if schema and domain and range:
142 if domain not in schema:
143 schema += domain #ADD DOMAIN AS NODE TO schema GRAPH
144 schema[domain][range]=self
145 self.dictClass.__init__(self) # INITIALIZE TOPLEVEL DICTIONARY
147 def __iadd__(self,node,ruleSet=False):
148 "Add node to graph with no edges"
149 if node not in self:
150 self.dictClass.__setitem__(self,node,self.edgeDictClass(self,node))
151 if ruleSet==False:
152 ruleSet=getschema(node,graph=self)
153 for rule in ruleSet:
154 if isinstance(rule[1],types.StringType): # ATTRIBUTE BINDING!
155 setattr(node,rule[1],self[node]) # BIND DIRECTLY TO ATTRIBUTE
156 return self # THIS IS REQUIRED FROM iadd()!!
158 def __setitem__(self,node,target):
159 "This method exists only to support g[n]+=o. Do not use as g[n]=foo."
160 if self[node]!=target:
161 raise ValueError('Incorrect usage. Add edges using g[n]+=o or g[n][o]=edge.')
163 def __delitem__(self,node):
164 "Delete node from graph."
165 # GRR, WE REALLY NEED TO FIND ALL EDGES THAT GO TO THIS NODE, DELETE THEM TOO
166 try:
167 self.dictClass.__delitem__(self,node) # DO STUFF TO REMOVE IT HERE...
168 except KeyError:
169 raise KeyError('Node not present in mapping.')
170 for rule in getschema(node,graph=self):
171 if isinstance(rule[1],types.StringType): # ATTRIBUTE BINDING!
172 delattr(node,rule[1]) # REMOVE ATTRIBUTE BINDING
174 def __isub__(self,node):
175 "Delete node from graph"
176 self.__delitem__(node)
177 return self # THIS IS REQUIRED FROM isub()!!
179 def __hash__(self): # SO SCHEMA CAN INDEX ON GRAPHS...
180 return id(self)
182 def edges(self):
183 "Return iterator for all edges in this graph"
184 for edgedict in self.values():
185 for edge in edgedict.edges():
186 yield edge
187 update = update_graph
193 class dictEdgeFB(dictEdge):
194 "dictEdge subclass that saves both forward and backward edges"
195 def __setitem__(self,target,edgeInfo):
196 "Save edge in both forward and backward dicts."
197 dictEdge.__setitem__(self,target,edgeInfo) # FORWARD EDGE
198 try:
199 d=self.graph._inverse[target]
200 except KeyError:
201 d=self.dictClass()
202 self.graph._inverse[target]=d
203 d[self.fromNode]=edgeInfo # SAVE BACKWARD EDGE
205 def __invert__(self):
206 "Get nodes with edges to this node"
207 return self.graph._inverse[self.fromNode]
209 class dictGraphFB(dictGraph):
210 "Graph that saves both forward and backward edges"
211 def __init__(self,**kwargs):
212 dictGraph.__init__(self,**kwargs)
213 self._inverse=self.dictClass()
214 __invert__ = classutil.standard_invert
215 def __delitem__(self,node):
216 "Delete node from the graph"
217 try:
218 fromNodes=self._inverse[node] #
219 del self._inverse[node] # REMOVE FROM _inverse DICT
220 except KeyError:
221 pass
222 else: # DELETE EDGES TO THIS NODE
223 for i in fromNodes:
224 del self[i][node]
225 dictGraph.__delitem__(self,node)
229 def listUnion(ivals):
230 'merge all items using union operator'
231 union=None
232 for ival in ivals:
233 try:
234 union+=ival
235 except TypeError:
236 union=ival
237 return union
240 class DictQueue(dict):
241 'each index entry acts like a queue; setitem PUSHES, and delitem POPS'
242 def __setitem__(self,k,val):
243 try:
244 dict.__getitem__(self,k).append(val)
245 except KeyError:
246 dict.__setitem__(self,k,[val])
247 def __getitem__(self,k):
248 return dict.__getitem__(self,k)[0]
249 def __delitem__(self,k):
250 l=dict.__getitem__(self,k)
251 del l[0]
252 if len(l)==0:
253 dict.__delitem__(self,k)
258 ################################ PYGR.DATA.SCHEMA - AWARE CLASSES BELOW
261 def close_if_possible(self):
262 'close storage to ensure any pending data is written'
263 try:
264 do_close = self.d.close
265 except AttributeError:
266 pass
267 else:
268 do_close()
273 class Collection(object):
274 'flexible storage mapping ID --> OBJECT'
275 def __init__(self,saveDict=None,dictClass=dict,**kwargs):
276 '''saveDict, if not None, is the internal mapping to use as our storage.
277 filename: if provided, is a file path to a shelve (BerkeleyDB) file to
278 store the data in.
279 dictClass: if provided, is the class to use for storage of the dict data.'''
280 if saveDict is not None:
281 self.d = saveDict
282 elif 'filename' in kwargs: # USE A SHELVE (BERKELEY DB)
283 try:
284 if kwargs['intKeys']: # ALLOW INT KEYS, HAVE TO USE IntShelve
285 self.__class__ = IntShelve
286 else:
287 raise KeyError
288 except KeyError:
289 self.__class__ = PicklableShelve
290 return self.__init__(**kwargs)
291 else:
292 self.d = dictClass()
293 classutil.apply_itemclass(self,kwargs)
294 def __getitem__(self,k): return self.d[k]
295 def __setitem__(self,k,v): self.d[k] = v
296 def __delitem__(self,k): del self.d[k]
297 def __len__(self): return len(self.d)
298 def __contains__(self,k): return k in self.d
299 def __iter__(self): return iter(self.d)
300 def __getattr__(self,attr):
301 if attr=='__setstate__' or attr=='__dict__': # PREVENT INFINITE RECURSE IN UNPICKLE
302 raise AttributeError
303 try: # PROTECT AGAINST INFINITE RECURSE IF NOT FULLY __init__ED...
304 return getattr(self.__dict__['d'],attr)
305 except KeyError:
306 raise AttributeError('Collection has no subdictionary')
307 close = close_if_possible
308 def __del__(self):
309 'must ensure that shelve object is closed to save pending data'
310 try:
311 self.close()
312 except classutil.FileAlreadyClosedError:
313 pass
315 class PicklableShelve(Collection):
316 'persistent storage mapping ID --> OBJECT'
317 def __init__(self,filename,mode=None,writeback=False,unpicklingMode=False,
318 verbose=True,**kwargs):
319 '''Wrapper for a shelve object that can be pickled. Ideally, you
320 should specify a TWO letter mode string: the first letter to
321 indicate what mode the shelve should be initially opened in, and
322 the second to indicate the mode to open the shelve during unpickling.
323 e.g. mode='nr': to create an empty shelve (writable),
324 which in future will be re-opened read-only.
325 Also, mode=None makes it first attempt to open read-only, but if the file
326 does not exist will create it using mode 'c'. '''
327 self.filename = classutil.SourceFileName(str(filename)) # MARKS THIS STR AS A FILE PATH
328 self.writeback = writeback
329 if unpicklingMode or mode is None or mode=='r': # JUST USE mode AS GIVEN
330 self.mode = mode
331 elif mode=='n' or mode=='c' or mode=='w': # AMBIGUOUS MODES, WARN & SET DEFAULT
332 if verbose:
333 import sys
334 print >>sys.stderr,'''Warning: you opened shelve file %s
335 in mode '%s' but this is ambiguous for how the shelve should be
336 re-opened later during unpickling. By default it will be
337 re-opened in mode 'r' (read-only). To make it be re-opened
338 writable, create it in mode '%sw', or call its method
339 reopen('w'), which will make it be re-opened in mode 'w' now and
340 in later unpickling. To suppress this warning message, use the
341 verbose=False option.''' % (filename,mode,mode)
342 self.mode = 'r'
343 else: # PROCESS UNAMBIGUOUS TWO-LETTER mode STRING
344 try:
345 if len(mode)==2 and mode[0] in 'ncwr' and mode[1] in 'cwr':
346 self.mode = mode[1] # IN FUTURE OPEN IN THIS MODE
347 mode = mode[0] # OPEN NOW IN THIS MODE
348 else:
349 raise ValueError('invalid mode string: '+mode)
350 except TypeError:
351 raise ValueError('file mode must be a string!')
352 if unpicklingMode:
353 self.d = classutil.open_shelve(filename,mode,writeback,allowReadOnly=True)
354 else:
355 self.d = classutil.open_shelve(filename,mode,writeback)
356 classutil.apply_itemclass(self,kwargs)
357 __getstate__ = classutil.standard_getstate ############### PICKLING METHODS
358 __setstate__ = classutil.standard_setstate
359 _pickleAttrs = dict(filename=0,mode=0,writeback=0)
360 def close(self):
361 '''close our shelve index file. '''
362 self.d.close()
363 def __setitem__(self,k,v):
364 try:
365 self.d[k]=v
366 except TypeError:
367 raise TypeError('to allow int keys, you must pass intKeys=True to constructor!')
368 def reopen(self,mode='r'):
369 're-open shelve in the specified mode, and save mode on self'
370 self.close()
371 self.d = classutil.open_shelve(self.filename,mode,writeback=self.writeback)
372 self.mode = mode
375 class IntShelve(PicklableShelve):
376 'provides an interface to shelve that can use int as key'
377 def saveKey(self,i):
378 'convert to string key'
379 if isinstance(i,int):
380 return 'int:%s'%i
381 elif isinstance(i,str):
382 return i
383 try:
384 return 'int:%s'%int(i)
385 except TypeError:
386 pass
387 raise KeyError('IntShelve can only save int or str as key')
388 def trueKey(self,k):
389 "convert back to key's original format"
390 if k.startswith('int:'):
391 return int(k[4:])
392 else:
393 return k
394 def __getitem__(self,k):
395 return self.d[self.saveKey(k)]
396 def __setitem__(self,k,v):
397 self.d[self.saveKey(k)]=v
398 def __delitem__(self,k): del self.d[self.saveKey(k)]
399 def __contains__(self,k):
400 return self.saveKey(k) in self.d
401 def __iter__(self): ################ STANDARD ITERATOR METHODS
402 for k in self.d:
403 yield self.trueKey(k)
404 def keys(self): return [k for k in self]
405 def iteritems(self):
406 for k,v in self.d.iteritems():
407 yield self.trueKey(k),v
408 def items(self): return [k for k in self.iteritems()]
412 ## PACKING / UNPACKING METHODS FOR SEPARATING INTERNAL VS. EXTERNAL
413 ## REPRESENTATIONS OF GRAPH NODES AND EDGES
414 ## 1. ID-BASED PACKING: USE obj.id AS INTERNAL REPRESENTATION
416 ## 2. TRIVIAL: INTERNAL AND EXTERNAL REPRESENTATIONS IDENTICAL
417 ## WORKS WELL FOR STRING OR INT NODES / EDGES.
419 ## 3. PICKLE PACKING: USE PICKLE AS INTERNAL REPRESENTATION
420 def pack_id(self,obj):
421 'extract id attribute from obj'
422 try:
423 return obj.id
424 except AttributeError:
425 if obj is None:
426 return None
427 raise
429 def unpack_source(self,objID):
430 return self.sourceDB[objID]
431 def unpack_target(self,objID):
432 return self.targetDB[objID]
433 def unpack_edge(self,objID):
434 try:
435 return self.edgeDB[objID]
436 except KeyError:
437 if objID is None:
438 return None
439 raise
441 def add_standard_packing_methods(localDict):
442 localDict['pack_source'] = pack_id
443 localDict['pack_target'] = pack_id
444 localDict['pack_edge'] = pack_id
445 localDict['unpack_source'] = unpack_source
446 localDict['unpack_target'] = unpack_target
447 localDict['unpack_edge'] = unpack_edge
449 def add_trivial_packing_methods(localDict):
450 for name in ('pack_source','pack_target','pack_edge',
451 'unpack_source','unpack_target','unpack_edge'):
452 localDict[name] = lambda self,obj:obj
455 def pack_pickle(self,obj):
456 'get pickle string for obj'
457 import pickle
458 return pickle.dumps(obj)
460 def unpack_pickle(self,s):
461 'unpickle string to get obj'
462 import pickle
463 return pickle.loads(s)
466 class MappingInverse(object):
467 def __init__(self,db):
468 self._inverse=db
469 self.attr=db.inverseAttr
470 def __getitem__(self,k):
471 return self._inverse.sourceDB[getattr(k,self.attr)]
472 __invert__ = classutil.standard_invert
474 class Mapping(object):
475 '''dict-like class suitable for persistent usages. Extracts ID values from
476 keys and values passed to it, and saves IDs into its internal dictionary
477 instead of the actual objects. Thus, the external interface is objects,
478 but the internal storage is ID values.'''
479 def __init__(self,sourceDB,targetDB,saveDict=None,IDAttr='id',targetIDAttr='id',
480 itemAttr=None,multiValue=False,inverseAttr=None,**kwargs):
481 '''sourceDB: dictionary that maps key ID values to key objects
482 targetDB: dictionary that maps value IDs to value objects
483 saveDict, if not None, is the internal mapping to use as our storage
484 IDAttr: attribute name to obtain an ID from a key object
485 targetIDAttr: attribute name to obtain an ID from a value object
486 itemAttr, if not None, the attribute to obtain target (value) ID
487 from an internal storage value
488 multiValue: if True, treat each value as a list of values.
489 filename: if provided, is a file path to a shelve (BerkeleyDB) file to
490 store the data in.
491 dictClass: if not None, is the class to use for storage of the dict data'''
492 if saveDict is None:
493 self.d=classutil.get_shelve_or_dict(**kwargs)
494 else:
495 self.d=saveDict
496 self.IDAttr=IDAttr
497 self.targetIDAttr=targetIDAttr
498 self.itemAttr=itemAttr
499 self.multiValue=multiValue
500 self.sourceDB=sourceDB
501 self.targetDB=targetDB
502 if inverseAttr is not None:
503 self.inverseAttr=inverseAttr
504 def __getitem__(self,k):
505 kID=getattr(k,self.IDAttr)
506 return self.getTarget(self.d[kID])
507 def getTarget(self,vID):
508 if self.itemAttr is not None:
509 vID=getattr(vID,self.itemAttr)
510 if self.multiValue:
511 return [self.targetDB[j] for j in vID]
512 else:
513 return self.targetDB[vID]
514 def __setitem__(self,k,v):
515 if self.multiValue:
516 v=[getattr(x,self.targetIDAttr) for x in v]
517 else:
518 v=getattr(v,self.targetIDAttr)
519 self.d[getattr(k,self.IDAttr)]=v
520 def __delitem__(self,k):
521 del self.d[getattr(k,self.IDAttr)]
522 def __contains__(self,k):
523 return getattr(k,self.IDAttr) in self.d
524 def __len__(self): return len(self.d)
525 def clear(self): self.d.clear()
526 def copy(self):
527 return Mapping(self.sourceDB,self.targetDB,self.d.copy(),self.IDAttr,
528 self.targetIDAttr,self.itemAttr,self.multiValue)
529 def update(self,b):
530 for k,v in b.iteritems():
531 self[k]=v
532 def get(self,k,v=None):
533 try:
534 return self[k]
535 except KeyError:
536 return v
537 def setdefault(self,k,v=None):
538 try:
539 return self[k]
540 except KeyError:
541 self[k]=v
542 return v
543 def pop(self,k,v=None):
544 try:
545 v=self[k]
546 except KeyError:
547 return v
548 del self[k]
549 return v
550 def popitem(self):
551 kID,vID=self.d.popitem()
552 return kID,self.getTarget(vID)
553 def __iter__(self): ######################## ITERATORS
554 for kID in self.d:
555 yield self.sourceDB[kID]
556 def keys(self): return [k for k in self]
557 def itervalues(self):
558 for vID in self.d.itervalues():
559 yield self.getTarget(vID)
560 def values(self): return [v for v in self]
561 def iteritems(self):
562 for kID,vID in self.d.iteritems():
563 yield self.sourceDB[kID],self.getTarget(vID)
564 def items(self): return [x for x in self.iteritems()]
565 __invert__ = classutil.standard_invert
566 _inverseClass = MappingInverse
567 close = close_if_possible
568 def __del__(self):
569 close_if_possible(self)
572 def graph_cmp(self,other):
573 'compare two graph dictionaries'
574 import sys
575 diff = cmp(len(self),len(other))
576 if diff!=0:
577 print >>sys.stderr,'len diff:',len(self),len(other)
578 return diff
579 for node,d in self.iteritems():
580 try:
581 d2 = other[node]
582 except KeyError:
583 print >>sys.stderr,'other missing key'
584 return 1
585 diff = cmp(d,d2)
586 if diff!=0:
587 print >>sys.stderr,'value diff',d,d2
588 return diff
589 return 0
593 class IDNodeDict(object):
594 """2nd layer graph interface implementation using proxy dict.
595 e.g. shelve."""
596 dictClass=dict
597 def __init__(self,graph,fromNode):
598 self.graph=graph
599 self.fromNode=fromNode
601 def __getitem__(self,target): ############# ACCESS METHODS
602 edgeID=self.graph.d[self.fromNode][self.graph.pack_target(target)]
603 return self.graph.unpack_edge(edgeID)
605 def __setitem__(self,target,edgeInfo):
606 "Add edge from fromNode to target with edgeInfo"
607 self.graph.d[self.fromNode][self.graph.pack_target(target)] \
608 = self.graph.pack_edge(edgeInfo)
609 if not hasattr(self.graph,'sourceDB') or \
610 (hasattr(self.graph,'targetDB') and self.graph.sourceDB==self.graph.targetDB):
611 self.graph+=target # ADD NEW NODE TO THE NODE DICT
613 def __delitem__(self,target):
614 "Delete edge from fromNode to target"
615 try:
616 del self.graph.d[self.fromNode][self.graph.pack_target(target)]
617 except KeyError: # GENERATE A MORE INFORMATIVE ERROR MESSAGE
618 raise KeyError('No edge from node to target')
619 ######### CONVENIENCE METHODS THAT USE THE ACCESS METHODS ABOVE
620 def __iadd__(self,target):
621 "Add edge from fromNode to target with no edge-info"
622 self[target]=None
623 return self # THIS IS REQUIRED FROM iadd()!!
625 def __isub__(self,target):
626 "Delete edge from fromNode to target"
627 self.__delitem__(target)
628 return self # THIS IS REQUIRED FROM iadd()!!
630 def edges(self):
631 "Return iterator for accessing edges from fromNode"
632 for target,edgeInfo in self.graph.d[self.fromNode].items():
633 yield (self.graph.unpack_source(self.fromNode),
634 self.graph.unpack_target(target),
635 self.graph.unpack_edge(edgeInfo))
636 def __len__(self):
637 return len(self.graph.d[self.fromNode])
638 def keys(self): return [k[1] for k in self.edges()] ##### ITERATORS
639 def values(self): return [k[2] for k in self.edges()]
640 def items(self): return [k[1:3] for k in self.edges()]
641 def __iter__(self):
642 for source,target,edgeInfo in self.edges():
643 yield target
644 def itervalues(self):
645 for source,target,edgeInfo in self.edges():
646 yield edgeInfo
647 def iteritems(self):
648 for source,target,edgeInfo in self.edges():
649 yield target,edgeInfo
650 __cmp__ = graph_cmp
653 class IDNodeDictWriteback(IDNodeDict):
654 'forces writing of subdictionary in shelve opened without writeback=True'
655 def __setitem__(self,target,edgeInfo):
656 d = self.graph.d[self.fromNode]
657 d[self.graph.pack_target(target)] = self.graph.pack_edge(edgeInfo)
658 self.graph.d[self.fromNode] = d # WRITE IT BACK... REQUIRED FOR SHELVE
659 self.graph += target # ADD NEW NODE TO THE NODE DICT
660 def __delitem__(self,target):
661 d = self.graph.d[self.fromNode]
662 del d[self.graph.pack_target(target)]
663 self.graph.d[self.fromNode] = d # WRITE IT BACK... REQUIRED FOR SHELVE
665 class IDNodeDictWriteNow(IDNodeDictWriteback):
666 'opens shelve for writing, writes an item, immediately reopens'
667 def __setitem__(self,target,edgeInfo):
668 self.graph.d.reopen('w')
669 IDNodeDictWriteback.__setitem__(self,target,edgeInfo)
670 self.graph.d.reopen('w')
671 def __delitem__(self,target):
672 self.graph.d.reopen('w')
673 IDNodeDictWriteback.__delitem__(self,target)
674 self.graph.d.reopen('w')
676 class IDGraphEdges(object):
677 '''provides iterator over edges as (source,target,edge) tuples
678 and getitem[edge] --> [(source,target),...]'''
679 def __init__(self,g):
680 self.g=g
681 def __iter__(self):
682 for d in self.g.itervalues():
683 for edge in d.edges():
684 yield edge
685 def __getitem__(self,edge):
686 l=[]
687 for sourceID,targetID in self.d[edge.id]:
688 l.append((self.g.sourceDB[sourceID],self.g.targetDB[targetID]))
689 return l
690 def __call__(self):
691 return self
693 class IDGraphEdgeDescriptor(object):
694 'provides interface to edges on demand'
695 def __get__(self,obj,objtype):
696 return IDGraphEdges(obj)
699 def save_graph_db_refs(self,sourceDB=None,targetDB=None,edgeDB=None,
700 simpleKeys=False,unpack_edge=None,
701 edgeDictClass=None,graph=None,**kwargs):
702 'apply kwargs to reference DB objects for this graph'
703 if sourceDB is not None:
704 self.sourceDB=sourceDB
705 else:
706 simpleKeys = True # NO SOURCE DB, SO STORE KEYS AS INTERNAL REPRESENTATION
707 if targetDB is not None:
708 self.targetDB=targetDB
709 if edgeDB is not None:
710 self.edgeDB=edgeDB
711 else: # just save the edge object as itself (not its ID)
712 self.pack_edge = self.unpack_edge = lambda edge:edge
713 if simpleKeys: # SWITCH TO USING TRIVIAL PACKING: OBJECT IS ITS OWN ID
714 self.__class__ = self._IDGraphClass
715 if unpack_edge is not None:
716 self.unpack_edge = unpack_edge # UNPACKING METHOD OVERRIDES DEFAULT
717 if graph is not None:
718 self.graph = graph
719 if edgeDictClass is not None:
720 self.edgeDictClass = edgeDictClass
722 def graph_db_inverse_refs(self,edgeIndex=False):
723 'return kwargs for inverse of this graph, or edge index of this graph'
724 if edgeIndex: # TO CONSTRUCT AN EDGE INDEX
725 db = ('edgeDB','sourceDB','targetDB')
726 else: # DEFAULT: TO CONSTRUCT AN INVERSE MAPPING OF THE GRAPH
727 db = ('targetDB','sourceDB','edgeDB')
728 try:
729 d = dict(sourceDB=getattr(self,db[0]),targetDB=getattr(self,db[1]))
730 try:
731 d['edgeDB'] = getattr(self,db[2]) # EDGE INFO IS OPTIONAL
732 except AttributeError:
733 pass
734 except AttributeError:
735 d = dict(simpleKeys=True) # NO SOURCE / TARGET DB, SO USE IDs AS KEYS
736 try: # COPY THE LOCAL UNPACKING METHOD, IF ANY
737 if not edgeIndex:
738 d['unpack_edge'] = self.__dict__['unpack_edge']
739 except KeyError:
740 pass
741 return d
744 def graph_setitem(self,node,target):
745 "This method exists only to support g[n]+=o. Do not use as g[n]=foo."
746 node=self.pack_source(node)
747 try:
748 if node==target.fromNode:
749 return
750 except AttributeError:
751 pass
752 raise ValueError('Incorrect usage. Add edges using g[n]+=o or g[n][o]=edge.')
755 class Graph(object):
756 """Top layer graph interface implemenation using proxy dict.
757 Works with dict, shelve, any mapping interface."""
758 edgeDictClass=IDNodeDict # DEFAULT EDGE DICT
759 def __init__(self,saveDict=None,dictClass=dict,writeNow=False,**kwargs):
760 if saveDict is not None: # USE THE SUPPLIED STORAGE
761 self.d = saveDict
762 elif 'filename' in kwargs: # USE A SHELVE (BERKELEY DB)
763 try:
764 if kwargs['intKeys']: # ALLOW INT KEYS, HAVE TO USE IntShelve
765 self.d = IntShelve(writeback=False,**kwargs)
766 else:
767 raise KeyError
768 except KeyError:
769 self.d = PicklableShelve(writeback=False,**kwargs)
770 if writeNow:
771 self.edgeDictClass = IDNodeDictWriteNow # WRITE IMMEDIATELY
772 else:
773 self.edgeDictClass = IDNodeDictWriteback # USE OUR OWN WRITEBACK
774 else:
775 self.d = dictClass()
776 save_graph_db_refs(self,**kwargs)
777 __getstate__ = classutil.standard_getstate ############### PICKLING METHODS
778 __setstate__ = classutil.standard_setstate
779 _pickleAttrs = dict(d='saveDict',sourceDB=0,targetDB=0,edgeDB=0,
780 edgeDictClass=0)
781 add_standard_packing_methods(locals()) ############ PACK / UNPACK METHODS
782 def close(self):
783 '''If possible, close our dict. '''
784 try:
785 do_close = self.d.close
786 except AttributeError:
787 pass
788 else:
789 do_close()
790 def __len__(self):
791 return len(self.d)
792 def __iter__(self):
793 for node in self.d:
794 yield self.unpack_source(node)
795 def keys(self): return [k for k in self]
796 def itervalues(self):
797 for node in self.d:
798 yield self.edgeDictClass(self,node)
799 def values(self): return [v for v in self.itervalues()]
800 def iteritems(self):
801 for node in self.d:
802 yield self.unpack_source(node),self.edgeDictClass(self,node)
803 def items(self): return [v for v in self.iteritems()]
804 edges=IDGraphEdgeDescriptor()
806 def __iadd__(self,node):
807 "Add node to graph with no edges"
808 node=self.pack_source(node)
809 if node not in self.d:
810 self.d[node]={} # INITIALIZE TOPLEVEL DICTIONARY
811 return self # THIS IS REQUIRED FROM iadd()!!
813 def __contains__(self,node):
814 return self.pack_source(node) in self.d
815 def __getitem__(self,node):
816 if node in self:
817 return self.edgeDictClass(self,self.pack_source(node))
818 raise KeyError('node not in graph')
819 __setitem__ = graph_setitem
820 def __delitem__(self,node):
821 "Delete node from graph."
822 node=self.pack_source(node)
823 # GRR, WE REALLY NEED TO FIND ALL EDGES THAT GO TO THIS NODE, DELETE THEM TOO
824 try:
825 del self.d[node] # DO STUFF TO REMOVE IT HERE...
826 except KeyError:
827 raise KeyError('Node not present in mapping.')
829 def __isub__(self,node):
830 "Delete node from graph"
831 self.__delitem__(node)
832 return self # THIS IS REQUIRED FROM isub()!!
833 update = update_graph
834 __cmp__ = graph_cmp
835 def __del__(self):
836 try:
837 self.close()
838 except classutil.FileAlreadyClosedError:
839 pass
842 # NEED TO PROVIDE A REAL INVERT METHOD!!
843 ## def __invert__(self):
844 ## 'get an interface to the inverse graph mapping'
845 ## try: # CACHED
846 ## return self._inverse
847 ## except AttributeError: # NEED TO CONSTRUCT INVERSE MAPPING
848 ## self._inverse=IDGraph(~(self.d),self.targetDB,self.sourceDB,self.edgeDB)
849 ## self._inverse._inverse=self
850 ## return self._inverse
851 def __hash__(self): # SO SCHEMA CAN INDEX ON GRAPHS...
852 return id(self)
855 class IDGraph(Graph):
856 add_trivial_packing_methods(locals())
858 Graph._IDGraphClass = IDGraph
860 class KeepUniqueDict(dict):
861 'dict that blocks attempts to overwrite an existing key'
862 def __setitem__(self,k,v):
863 try:
864 if self[k] is v:
865 return # ALREADY SAVED. NOTHING TO DO!
866 except KeyError: # NOT PRESENT, SO JUST SAVE THE VALUE
867 dict.__setitem__(self,k,v)
868 return
869 raise KeyError('attempt to overwrite existing key!')
870 def __hash__(self):
871 'ALLOW THIS OBJECT TO BE USED AS A KEY IN DICTS...'
872 return id(self)