added SQLTable pickle test
[pygr.git] / pygr / graphquery.py
blobba054cc8408c4fa55bb43d883052f000d94d6ee9
3 from __future__ import generators
4 from mapping import *
7 class QueryMatchWrapper(dict):
8 "build a queryMatch mapping on demand, since its not actually needed during query traversal"
9 def __init__(self,dataMatch,compiler):
10 dict.__init__(self)
11 for k,v in dataMatch.items(): # INVERT THE DATA MATCH MAPPING
12 self[v]=k
13 for i in range(compiler.n): # ALSO SAVE MAPPINGS TO DATA EDGES
14 gqi=compiler.gqi[i]
15 self[gqi.fromNode,gqi.queryNode]=compiler.dataEdge[i]
17 class QueryMatchDescriptor(object):
18 def __get__(self,obj,objtype):
19 return QueryMatchWrapper(obj.dataMatch,obj)
22 class QueryMatcher(object):
23 "map a query node or edge on demand"
24 def __init__(self,compiler):
25 self.compiler=compiler
26 def __getitem__(self,k):
27 for q,d in self.iteritems():
28 if q==k:
29 return d
30 return KeyError
31 def __iter__(self,k):
32 for k,v in self.iteritems():
33 yield k
34 def iteritems(self):
35 for dataNode,queryNode in self.compiler.dataMatch.items():
36 yield queryNode,dataNode # RETURN NODE MAPPINGS
37 for i in range(self.compiler.n): # ALSO SAVE MAPPINGS TO DATA EDGES
38 gqi=self.compiler.gqi[i] # RETURN EDGE MAPPINGS
39 yield (gqi.fromNode,gqi.queryNode),self.compiler.dataEdge[i]
40 def items(self):
41 return [x for x in self.iteritems()]
42 def __repr__(self):
43 return '{'+','.join([repr(k)+':'+repr(v) for k,v in self.iteritems()])+'}'
47 class GraphQueryCompiler(object):
48 'compile a series of GraphQueryIterators into python code, run them'
49 #queryMatch=QueryMatchDescriptor()
50 _lang="" # NO LANGUAGE STEM
51 def __init__(self,name='graphquery',globalDict=None):
52 self.name=name
53 self.code=[]
54 self.unmark_code=[]
55 self.next_code=[]
56 self.end_code=[]
57 self.indent=[]
58 self.gqi=[]
59 self.queryLayerGraph=dictGraph()
60 self.n=0
61 if globalDict is None:
62 self._compiled={}
63 else:
64 self._compiled=globalDict
65 self.queryMatch=QueryMatcher(self)
66 def __getitem__(self,key):
67 'return appropropriate code for accessing nodes / edges in data or query'
68 if key=='n':
69 return self.n
70 elif key=='name':
71 return self.name
72 elif key=='dataGraph':
73 queryEdge=self.gqi[self.n].queryGraph[self.gqi[self.n].fromNode][self.gqi[self.n].queryNode]
74 try: # CHECK IF QUERY EDGE USES A NON-DEFAULT DATA GRAPH
75 dg=queryEdge['dataGraph']
76 return 'self.gqi[%d].dataGraph' % self.n
77 except (TypeError,KeyError):
78 return 'dataGraph'
79 elif key=='filter':
80 return 'self.gqi[%d].filter' % self.n
81 elif key=='toQueryNode':
82 return 'self.gqi[%d].queryNode' % self.n
83 elif key=='fromQueryNode':
84 return 'self.gqi[%d].fromNode' % self.n
85 if key[:2]=='to':
86 layer=self.queryLayerGraph[self.gqi[self.n].queryNode]
87 elif key[:4]=='from':
88 layer=self.queryLayerGraph[self.gqi[self.n].fromNode]
89 if key[-8:]=='DataNode': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
90 return 'dataNode%d' %layer.values()[0]
91 if key[-8:]=='DataEdge': # GET LAST LAYER, WHERE THIS EDGE CREATED
92 return 'dataEdge[%d]' %layer.values()[-1]
93 if key=='level':
94 return self.n
95 try:
96 return getattr(self.gqi[self.n],key)
97 except AttributeError:
98 raise KeyError('%s not a valid GraphQueryCompiler key' % key)
100 def indent_code(self,codestr,current_indent):
101 'calculate indentation levels added by code in codestr'
102 codestr=codestr % self # PERFORM MACRO SUBSTITUTIONS
103 lines=codestr.split('\n')
104 lastline=lines[-1]
105 if lastline=='' and len(lines)>1: # IGNORE TERMINAL BLANK LINE
106 lastline=lines[-2]
107 nindent=len(lastline.split('\t'))-1 # DETERMINE FINAL INDENTATION LEVEL
108 if len(lastline)>0 and lastline[-1]==':':
109 nindent+=1
110 s='' # NOW FORMAT THE CODE WITH PROPER INDENTATION LEVEL
111 for line in lines:
112 s+=current_indent*'\t'+line+'\n'
113 return s,current_indent+nindent
115 def __iadd__(self,gqi):
116 'add a GraphQueryIterator to be compiled into this query'
117 self.gqi.append(gqi)
118 if gqi.queryNode not in self.queryLayerGraph: # NOT ALREADY BOUND?
119 self.queryLayerGraph+=gqi.queryNode
120 codestr=getattr(gqi,self._lang+'_generator_code')
121 markstr=getattr(gqi,self._lang+'_index_code')
122 unmarkstr=getattr(gqi,self._lang+'_unmark_code')
123 try:
124 endcode=getattr(gqi,self._lang+'_end_code')
125 except AttributeError:
126 endcode=''
127 try:
128 nextcode=getattr(gqi,self._lang+'_next_code')
129 except AttributeError:
130 nextcode=''
131 self.lastGenerator=self.n
132 else:
133 codestr=getattr(gqi,self._lang+'_closure_code')
134 markstr=None
135 unmarkstr=getattr(gqi,self._lang+'_unmark_closure_code')
136 try:
137 endcode=getattr(gqi,self._lang+'_end_closure_code')
138 except AttributeError:
139 endcode=''
140 try:
141 nextcode=getattr(gqi,self._lang+'_next_closure_code')
142 except AttributeError:
143 nextcode=''
144 #BIND QUERY EDGE TO THIS LAYER
145 self.queryLayerGraph[gqi.queryNode][gqi.fromNode]=self.n
146 try: # GET INDENTATION LEVEL FROM PREVIOUS LAYER
147 current_indent=self.indent[-1]
148 except IndexError:
149 current_indent=1 # TOPLEVEL: MUST INDENT INSIDE def
150 self.end_code.append(self.indent_code(endcode,current_indent)[0])
151 s,current_indent=self.indent_code(codestr,current_indent)
152 self.next_code.append(self.indent_code(nextcode,current_indent)[0])
153 if hasattr(gqi,'filter'):
154 s2,current_indent=self.indent_code(getattr(gqi,self._lang+'_filter_code'),
155 current_indent)
156 s+=s2
157 if hasattr(gqi,'filtercode'):
158 s2,current_indent=self.indent_code(gqi.filtercode,current_indent)
159 s+=s2
160 if markstr is not None:
161 s2,current_indent=self.indent_code(markstr,current_indent)
162 s+=s2
163 if unmarkstr is not None:
164 s2,tail_indent=self.indent_code(unmarkstr,current_indent)
165 self.unmark_code.append(s2)
166 else:
167 self.unmark_code.append('') # NO UNMARK CODE, SO JUST APPEND BLANK
168 self.code.append(s)
169 self.indent.append(current_indent)
170 self.n+=1
171 return self # iadd MUST RETURN self!!
172 _def_code="""
173 def %(name)s(self,dataGraph,dataMatch=None,queryMatch=None):
174 if dataMatch is None: dataMatch={}
175 self.dataMatch=dataMatch
176 dataEdge=%(n)d*[None]
177 self.dataEdge=dataEdge
179 _yield_code='yield self.queryMatch\n'
180 _end_code=''
181 def __str__(self):
182 'generate code for this query, as a string function definition'
183 s=self._def_code % self
184 for layer in self.code: # GENERATE ALL THE TRAVERSAL CODE
185 s+=layer
186 s2=self.indent_code(self._yield_code,self.indent[-1])[0] # yield THE RESULT
187 s+=s2
188 i=len(self.unmark_code)-1
189 while i>=0: # GENERATE THE UNMARKING CODE...
190 s+=self.unmark_code[i]
191 s+=self.next_code[i]
192 s+=self.end_code[i]
193 i-=1
194 s+=self._end_code % self
195 return s
196 def run(self,dataGraph,*args,**kwargs):
197 'run the query, pre-compiling it if necessary'
198 try: # JUST TRY RUNNING OUR FUNCTION: IT RETURNS AN ITERATOR
199 return self._compiled[self.name](self,dataGraph,*args,**kwargs)
200 except KeyError:
201 self.compile()
202 return self._compiled[self.name](self,dataGraph,*args,**kwargs) # RUN IT
203 def compile(self):
204 'compile using Python exec statement'
205 exec str(self) in self._compiled # COMPILE OUR FUNCTION
209 def find_distutils_lib(path='build'):
210 'locate the build/lib path where distutils builds modules'
211 import os
212 dirs=os.listdir('build') # TRY TO FIND LIB DIRECTORY CONTAINING BUILT MODULE
213 for d in dirs:
214 if d[:4]=='lib.':
215 return path+'/'+d
216 raise OSError((1,'unable to locate build/lib where distutils built your module!'))
220 class GraphQueryPyrex(GraphQueryCompiler):
221 'compile a series of GraphQueryIterators into pyrex code, run them'
222 #queryMatch=QueryMatchDescriptor()
223 _lang="_pyrex" # NO LANGUAGE STEM
224 def __getitem__(self,key):
225 'return appropropriate code for accessing nodes / edges in data or query'
226 if key=='n':
227 return self.n
228 elif key=='name':
229 return self.name
230 elif key=='dataGraph':
231 try: # CHECK IF QUERY EDGE USES A NON-DEFAULT DATA GRAPH
232 queryEdge=self.gqi[self.n].queryGraph[self.gqi[self.n].fromNode][self.gqi[self.n].queryNode]
233 dg=queryEdge['dataGraph']
234 return 'self.gqi[%d].dataGraph' % self.n
235 except (TypeError,KeyError):
236 return 'dataGraph'
237 elif key=='filter':
238 return 'self.gqi[%d].filter' % self.n
239 elif key=='toQueryNode':
240 return 'self.gqi[%d].queryNode' % self.n
241 elif key=='fromQueryNode':
242 return 'self.gqi[%d].fromNode' % self.n
243 if key[:2]=='to':
244 layer=self.queryLayerGraph[self.gqi[self.n].queryNode]
245 elif key[:4]=='from':
246 layer=self.queryLayerGraph[self.gqi[self.n].fromNode]
247 if key[-8:]=='DataNode': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
248 return 'dataNode%d' %layer.values()[0]
249 if key[-8:]=='DataEdge': # GET LAST LAYER, WHERE THIS EDGE CREATED
250 return 'dataEdge%d' %layer.values()[-1]
251 if key[-8:]=='DataDict': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
252 return 'cDict%d' %layer.values()[0]
253 if key[-7:]=='DataPtr': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
254 return 'pDictEntry%d' %layer.values()[0]
255 if key[-11:]=='DataPtrCont': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
256 return 'pGraphEntry%d' %layer.values()[0]
257 if key[-11:]=='DataCounter': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
258 return 'i%d' %layer.values()[0]
259 if key=='toDataNodeUnmatched':
260 l=['dataNode%d!=dataNode%d' %(self.queryLayerGraph[self.gqi[i].queryNode].values()[0],
261 self.queryLayerGraph[self.gqi[self.n].queryNode].values()[0])
262 for i in range(self.n)]
263 if len(l)>0:
264 return ' and '.join(l)
265 else:
266 return 'True'
267 if key=='dataNodeDefs':
268 return ','.join(['dataNode%d' %i for i in range(self.n)])
269 if key=='dataEdgeDefs':
270 return ','.join(['dataEdge%d' %i for i in range(self.n)])
271 if key=='dataDictDefs':
272 return ','.join(['*cDict%d' %i for i in range(self.n)])
273 if key=='dataPtrDefs':
274 return ','.join(['*pDictEntry%d' %i for i in range(self.n)])
275 if key=='dataPtrContDefs':
276 return ','.join(['*pGraphEntry%d' %i for i in range(self.n)])
277 if key=='dataCounterDefs':
278 return ','.join(['i%d' %i for i in range(self.n)])
279 if key=='dataCounterArgs':
280 return ','.join(['int i%d' %i for i in range(self.n)])
281 if key=='itaVector':
282 return ','.join(['ita.vector[%d]' %i for i in range(self.n)])
283 if key=='itaTuple':
284 return ',\\\n'.join(['p_ita[%d]' %i for i in range(2*self.n)])
285 if key=='resultTuple':
286 return ',\\\n'.join(['dataNode%d,dataEdge%d'
287 %(self.queryLayerGraph[self.gqi[i].queryNode].values()[0],i)
288 for i in range(self.n)])
289 if key=='resultTuples':
290 return ','.join(['(dataNode%d,dataEdge%d)'
291 %(self.queryLayerGraph[self.gqi[i].queryNode].values()[0],i)
292 for i in range(self.n)])
293 if key=='level' or key=='nEdges':
294 return self.n
295 if key=='lastGenerator':
296 return self.lastGenerator
297 try:
298 return getattr(self.gqi[self.n],key)
299 except AttributeError:
300 raise KeyError('%s not a valid GraphQueryPyrex key' % key)
302 _def_code="""
303 cimport cdict
304 cdef c_%(name)s(cdict.CGraphDict cgd,cdict.IntTupleArray ita,%(dataCounterArgs)s):
305 cdef cdict.CGraph *dataGraph
306 cdef cdict.CDict %(dataDictDefs)s
307 cdef cdict.CDictEntry %(dataPtrDefs)s
308 cdef cdict.CDictEntry *pd_temp
309 cdef cdict.CGraphEntry %(dataPtrContDefs)s
310 cdef cdict.CGraphEntry *p_temp
311 #cdef int %(dataCounterDefs)s
312 cdef int %(dataNodeDefs)s
313 cdef int %(dataEdgeDefs)s
314 cdef int *p_ita
315 dataGraph=cgd.d
316 p_ita=ita.data
318 _yield_code="""
319 %(itaTuple)s=%(resultTuple)s
320 p_ita=p_ita+2*%(nEdges)d
321 ita.n=ita.n+1
322 if ita.n>=ita.n_alloc:
323 ita.set_vector((%(dataCounterDefs)s),1)
324 return
326 #results.append((%(resultTuples)s))\n
327 _end_code="""
328 ita.isDone=1 # COMPLETED THIS QUERY
330 from pygr import cdict
331 def %(name)s(self,g,int maxhit=1000,cdict.IntTupleArray ita=None,qml=None):
332 if not isinstance(g,cdict.CGraphDict):
333 g=cdict.CGraphDict(g,cdict.KeyIndex())
334 if ita is None:
335 ita=cdict.IntTupleArray(maxhit,%(nEdges)d,2,%(lastGenerator)d)
336 ita.n=0
337 c_%(name)s(g,ita,%(itaVector)s) # RUN THE QUERY
338 if qml is not None:
339 qml.matches=ita
340 return qml
341 else:
342 return cdict.QueryMatchList(self,ita,g,%(name)s)
344 def compile(self):
345 'compile using Pyrex, Distutils, and finally import!'
346 import os
347 try: # WE NEED ACCESS TO PYGR SOURCE TO ACCESS cgraph FUNCTIONS IN THIS MODULE...
348 pygrpath=os.environ['PYGRPATH']
349 except KeyError:
350 raise OSError((1,"""pyrex compilation requires access to pygr source.
351 Please set the environment variable PYGRPATH to the top of the pygr source package."""))
352 if not os.access(pygrpath+'/pygr/cgraph.c',os.R_OK):
353 raise OSError((1,"""Unable to access %s/pygr/cgraph.c.
354 Is PYGRPATH set to the top of the pygr source package?""" % pygrpath))
355 exit_status=os.system('cp %s/pygr/cgraph.c %s/pygr/cgraph.h %s/pygr/cdict.pxd .'
356 % (pygrpath,pygrpath,pygrpath))
357 if exit_status!=0: # RUN THE PYREX COMPILER TO PRODUCE C
358 raise OSError((exit_status,
359 'unable to copy source code to this directory.'))
360 modulename=self.name+str(id(self)) # CONSTRUCT A UNIQUE NAME FOR MODULE
361 myfile=file(modulename+'.pyx','w') # GENERATE PYREX CODE
362 myfile.write(str(self)) # WRITE CODE
363 myfile.close()
364 exit_status=os.system('pyrexc %s.pyx' % (modulename))
365 if exit_status!=0: # RUN THE PYREX COMPILER TO PRODUCE C
366 raise OSError((exit_status,
367 'pyrex compilation failed. Is pyrex missing or not in your PATH?'))
368 from distutils.core import setup, Extension
369 module1 = Extension(modulename,sources = ['cgraph.c', modulename+'.c'])
370 setup (name = modulename,description = 'autogenerated by pygr.graphquery',
371 ext_modules = [module1],script_args=['build']) # BUILD MODULE USING distutils
372 modulepath=find_distutils_lib() # FIND DIRECTORY CONTAINING OUR BUILT MODULE
373 # WORKAROUND NASTY PROBLEM WITH PYREX cimport: NO WAY TO TELL IT MODULE IS
374 # IN A SUBDIRECTORY! I.E. from pygr cimport cdict FAILS, AS DOES cimport pygr.cdict
375 # INSTEAD WE HAVE TO SAY cimport cdict
376 import sys
377 import os.path
378 from pygr import cdict
379 sys.path+=[os.path.dirname(cdict.__file__)] # SO WE HAVE TO ADD ITS LOCATION TO OUR PATH
380 import imp # FINALLY, TRY TO IMPORT THE NEW MODULE
381 modulefile,path,desc=imp.find_module(modulename,[modulepath])
382 self._module=imp.load_module(modulename,modulefile,path,desc) # LOAD & BIND THE MODULE
383 self._compiled[self.name]=getattr(self._module,self.name) # BIND OUR QUERY FUNCTION
384 modulefile.close()
390 class GraphQueryIterator(object):
391 """iterator for a single node in graph query. Subclasses provide different
392 flavors of generator methods: graph w/ edges; container; attr; function etc."""
393 def __init__(self,fromNode,queryNode,dataGraph,queryGraph,
394 dataMatch,queryMatch,attrDict={}):
395 self.fromNode=fromNode
396 self.queryNode=queryNode
397 self.dataGraph=dataGraph
398 self.queryGraph=queryGraph
399 self.dataMatch=dataMatch
400 self.queryMatch=queryMatch
401 self.dataNode=None
402 for attr,val in attrDict.items(): # SAVE OUR EDGE INFO
403 setattr(self,attr,val) # JUST ATTACH EDGE INFO AS ATTRIBUTES OF THIS OBJ
404 ## try:
405 ## self.nq=len(self.queryGraph[self.queryNode])
406 ## except KeyError:
407 ## self.nq=0
409 def restart(self):
410 "reset the iterator to its beginning"
411 self.mustMark=True
412 if self.fromNode != None:
413 self.dataNode=self.queryMatch[self.fromNode]
414 if self.queryNode in self.queryMatch: # ALREADY ASSIGNED TO A DATA NODE
415 self.mustMark=False
416 if self.fromNode is None: # NO PATH TO HERE, SO JUST ECHO SINGLETON
417 self.iterator=self.echo()
418 else: # CHECK FOR PATH FROM fromNode TO THIS DATA NODE
419 self.iterator=self.closure()
420 else:
421 self.iterator=self.generate()
423 def echo(self):
424 "Just return what our node is ALREADY matched to"
425 yield self.queryMatch[self.queryNode],None
427 def closure(self):
428 "this node is already matched. Make sure there exists path to it (closure)"
429 targetNode=self.queryMatch[self.queryNode]
430 try: # GENERATE IFF container HAS EDGE TO targetNode
431 container=self.dataGraph[self.dataNode]
432 yield targetNode,container[targetNode]
433 except KeyError:pass
435 def generate(self):
436 "generate all neighbors of data node matched to fromNode"
437 try:
438 it=self.dataGraph[self.dataNode]
439 except KeyError:
440 pass
441 else:
442 for i,e in it.items():
443 yield i,e
445 _generator_code="""
446 try: # GENERATOR
447 it%(level)d=%(dataGraph)s[%(fromDataNode)s]
448 except KeyError:
449 continue
450 for %(toDataNode)s,%(toDataEdge)s in it%(level)d.items():"""
451 _index_code="""
452 if %(toDataNode)s in dataMatch:
453 continue
454 else:
455 dataMatch[%(toDataNode)s]=%(toQueryNode)s
456 #queryMatch[%(toQueryNode)s]=%(toDataNode)s
457 #queryMatch[%(fromQueryNode)s,%(toQueryNode)s]=%(toDataEdge)s
458 # THIS LINE PREVENTS COMPILER FROM PUSHING EXTRA INDENTATION LAYER"""
459 _filter_code="if self.gqi[%(level)d].filter(toNode=%(toDataNode)s,fromNode=%(fromDataNode)s,edge=%(toDataEdge)s,queryMatch=self.queryMatch,gqi=self.gqi[%(level)d]):"
460 _unmark_code="""
461 del dataMatch[%(toDataNode)s]
462 #del queryMatch[%(toQueryNode)s]
463 #del queryMatch[%(fromQueryNode)s,%(toQueryNode)s]"""
464 _closure_code="""
465 try: # CLOSURE
466 %(toDataEdge)s=%(dataGraph)s[%(fromDataNode)s][%(toDataNode)s]
467 except KeyError:
468 pass
469 else:
470 #queryMatch[%(fromQueryNode)s,%(toQueryNode)s]=%(toDataEdge)s"""
471 _unmark_closure_code="#del queryMatch[%(fromQueryNode)s,%(toQueryNode)s]"
473 # PYREX CODE
474 _pyrex_generator_code="""
475 p_temp=cdict.cgraph_getitem(%(dataGraph)s,%(fromDataNode)s)
476 if p_temp!=NULL:
477 %(fromDataDict)s=p_temp[0].v
478 %(toDataPtr)s=%(fromDataDict)s[0].dict
479 while %(toDataCounter)s < %(fromDataDict)s[0].n:
480 %(toDataNode)s=%(toDataPtr)s[%(toDataCounter)s].k
481 %(toDataEdge)s=%(toDataPtr)s[%(toDataCounter)s].v
483 #for %(toDataCounter)s from 0 <= %(toDataCounter)s < %(fromDataDict)s[0].n:
484 _pyrex_index_code='if %(toDataNodeUnmatched)s:'
485 _pyrex_unmark_code='# COMPILER NEEDS AT LEAST ONE LINE, EVEN THOUGH NOTHING TO DO HERE'
486 _pyrex_next_code='%(toDataCounter)s=%(toDataCounter)s+1'
487 _pyrex_end_code='%(toDataCounter)s=0'
488 _pyrex_closure_code="""
489 p_temp=cdict.cgraph_getitem(%(dataGraph)s,%(fromDataNode)s)
490 if p_temp!=NULL:
491 %(fromDataDict)s=p_temp[0].v
492 pd_temp=cdict.cdict_getitem(%(fromDataDict)s,%(toDataNode)s)
493 if pd_temp!=NULL:
494 %(toDataEdge)s=pd_temp[0].v
496 _pyrex_unmark_closure_code='# COMPILER NEEDS AT LEAST ONE LINE, EVEN THOUGH NOTHING TO DO HERE'
498 def unmark(self):
499 "erase node and edge assignment associated with the iterator"
500 if self.mustMark and self.queryNode in self.queryMatch:
501 i=self.queryMatch[self.queryNode] # ERASE OLD NODE ASSIGNMENT
502 del self.dataMatch[i]
503 del self.queryMatch[self.queryNode]
504 try: del self.queryMatch[(self.fromNode,self.queryNode)] #ERASE OLD EDGE
505 except KeyError: pass
508 def next(self):
509 "returns the next node from iterator that passes all tests"
510 self.unmark()
511 for i,e in self.iterator: # RETURN THE FIRST ACCEPTABLE ITEM
512 ## try: # THIS EDGE COUNT CHECK WON'T WORK IF MULTIPLE GRAPHS BEING QUERIED!!
513 ## nd=len(self.dataGraph[i]) # CHECK # OF OUTGOING EDGES
514 ## except KeyError:
515 ## nd=0
516 ## if nd>=self.nq and
517 if self.mustMark and i in self.dataMatch:
518 continue # THIS NODE ALREADY ASSIGNED. CAN'T REUSE IT!
519 if (not hasattr(self,'filter') # APPLY EDGE / NODE TESTS HERE
520 or self.filter(toNode=i,fromNode=self.dataNode,edge=e,
521 queryMatch=self.queryMatch,gqi=self)):
522 if self.mustMark:
523 self.dataMatch[i]=self.queryNode # SAVE THIS NODE ASSIGNMENT
524 self.queryMatch[self.queryNode]=i
525 if e is not None: # SAVE EDGE INFO, IF ANY
526 self.queryMatch[(self.fromNode,self.queryNode)]=e
527 return i # THIS ITEM PASSES ALL TESTS. RETURN IT
528 return None # NO MORE ITEMS FROM THE ITERATOR
533 class ContainerGQI(GraphQueryIterator):
534 "Iterate over all nodes in self.dataGraph"
535 def generate(self):
536 for i in self.dataGraph:
537 yield i,None
538 _generator_code="""
539 %(toDataEdge)s=None # CONTAINER
540 for %(toDataNode)s in dataGraph:"""
542 _pyrex_generator_code="""
543 %(toDataPtrCont)s=%(dataGraph)s[0].dict
544 for %(toDataCounter)s from 0 <= %(toDataCounter)s < %(dataGraph)s[0].n:
545 %(toDataNode)s=%(toDataPtrCont)s[%(toDataCounter)s].k
546 %(toDataEdge)s= -1 # NO EDGE INFO
551 class AttributeGQI(GraphQueryIterator):
552 "Iterate over all nodes in attribute called self.attr of self.dataNode"
553 def generate(self):
554 for i,e in getattr(self.dataNode,self.attr).items():
555 yield i,e
556 _generator_code="for %(toDataNode)s,%(toDataEdge)s in getattr(%(fromDataNode)s,'%(attr)s').items():"
558 class AttrContainerGQI(GraphQueryIterator):
559 "Iterate over all nodes in attribute called self.attrN of self.dataNode (no edge info)"
560 def generate(self):
561 for i in getattr(self.dataNode,self.attrN):
562 yield i,None
563 _generator_code="""
564 %(toDataEdge)s=None
565 for %(toDataNode)s in getattr(%(fromDataNode)s,'%(attrN)s'):"""
567 class CallableGQI(GraphQueryIterator):
568 "Call the specified function self.f as iterator"
569 def generate(self):
570 for i,e in self.f(self.dataNode,self.dataGraph,self):
571 yield i,e
572 _generator_code="for %(toDataNode)s,%(toDataEdge)s in self.gqi[%(level)d].f(%(fromDataNode)s,dataGraph,self.gqi[%(level)d]):"
574 class CallableContainerGQI(GraphQueryIterator):
575 "Call the specified function self.fN as iterator (no edge info)"
576 def generate(self):
577 for i in self.fN(self.dataNode,self.dataGraph,self):
578 yield i,None
579 _generator_code="""
580 %(toDataEdge)s=None
581 for %(toDataNode)s in self.gqi[%(level)d].fN(%(fromDataNode)s,dataGraph,self.gqi[%(level)d]):"""
583 class SubqueryGQI(GraphQueryIterator):
584 """base class for running subqueries; produces a union of all subquery solutions.
585 self.subqueries must be list of graph objects, each representing a subquery"""
586 def __init__(self,fromNode,queryNode,dataGraph,queryGraph,
587 dataMatch,queryMatch,attrDict={}):
588 GraphQueryIterator.__init__(self,fromNode,queryNode,dataGraph,queryGraph,
589 dataMatch,queryMatch,attrDict)
590 self.graphQueries=[]
591 for qg in self.subqueries: # INITIALIZE OUR SUBQUERIES
592 self.graphQueries.append(self.gqClass(self.dataGraph,qg,
593 dataMatch,queryMatch))
594 def closure(self):
595 "Generate union of all solutions returned by all subqueries"
596 for gq in self.graphQueries:
597 for d in gq: # LAUNCHES THE GRAPH QUERY, GETS ALL ITS SOLUTIONS
598 yield self.queryMatch[self.queryNode],None
599 gq.cleanup() # REMOVE ITS QUERY-DATA MAPPING BEFORE GOING TO NEXT SUBQUERY
604 def newGQI(self,oclass,fromNode,toNode,dataGraph,queryGraph,
605 dataMatch,queryMatch,gqiDict):
606 """figure out a default GQI class to use, based on an attribute dictionary,
607 then return a new object of that class initialized with the input data."""
608 if fromNode is not None and toNode is not None and \
609 queryGraph[fromNode][toNode] is not None:
610 kwargs=queryGraph[fromNode][toNode]
611 for attr in kwargs:
612 try: oclass=gqiDict[attr] # USE ATTRIBUTE NAME TO DETERMINE DEFAULT CLASS
613 except KeyError: pass
614 else:
615 kwargs={}
616 try: oclass=kwargs['__class__'] # LET USER SET CLASS TO USE
617 except KeyError: pass
618 return oclass(fromNode,toNode,dataGraph,queryGraph,dataMatch,queryMatch,kwargs)
622 class GraphQuery(object):
623 "represents a single query or subquery"
624 # DEFAULT MAPPING OF ATTRIBUTE NAMES TO GQI CLASSES TO USE WITH THEM
625 gqiDict={'attr':AttributeGQI,
626 'attrN':AttrContainerGQI,
627 'f':CallableGQI,
628 'fN':CallableContainerGQI,
629 'subqueries':SubqueryGQI}
630 newGQI=newGQI # USE THIS METHOD TO CHOOSE GQI CLASS FOR EACH ITERATOR
631 def __init__(self,dataGraph,queryGraph,dataMatch=None,queryMatch=None):
632 'Enumerate nodes in queryGraph in BFS order, constructing iterator stack'
633 self.dataGraph=dataGraph
634 self.queryGraph=queryGraph
635 if dataMatch is None:
636 dataMatch={}
637 if queryMatch is None:
638 queryMatch={}
639 self.dataMatch=dataMatch
640 self.queryMatch=queryMatch
641 # 1ST NEED TO FIND START NODES, PROCESS THEM 1ST, MARK THEM AS GENERATE ALL...
642 isFollower={}
643 for node in queryGraph:
644 for node2 in queryGraph[node]:
645 isFollower[node2]=True # node2 HAS INCOMING EDGE, SO NOT A START NODE!
646 q=[]
647 self.q=q
649 for node in queryGraph: # PLACE START NODES AT HEAD OF QUEUE
650 if node not in isFollower:
651 q.append(self.newGQI(ContainerGQI,None,node,dataGraph,queryGraph,
652 dataMatch,queryMatch,self.gqiDict))
653 n += 1
654 if n==0: # NO START NODES, SO JUST ADD THE FIRST QUERY NODE TO THE QUEUE
655 for node in queryGraph:
656 q.append(self.newGQI(ContainerGQI,None,node,dataGraph,queryGraph,
657 dataMatch,queryMatch,self.gqiDict))
658 n += 1
659 break # JUST ADD THE FIRST NODE TO THE QUEUE
660 if n==0: raise ValueError('query graph is empty!')
661 visited={}
663 while i<n: # ADD NODE TO QUEUE EVEN IF ALREADY VISITED, BUT DON'T ADD ITS NEIGHBORS
664 if q[i].queryNode not in visited: # ADD NEIGHBORS TO THE QUEUE
665 visited[q[i].queryNode]=True # MARK AS VISITED
666 for node in queryGraph[q[i].queryNode]: # GET ALL ITS NEIGHBORS
667 #print 'QUEUE:',n,node
668 q.append(self.newGQI(GraphQueryIterator,q[i].queryNode,node,
669 dataGraph,queryGraph,
670 dataMatch,queryMatch,self.gqiDict))
671 n+=1
672 i+=1
675 def __iter__(self):
676 "generates all subgraphs of dataGraph matching queryGraph"
678 n=len(self.q)
679 self.q[0].restart() # PRELOAD ITERATOR FOR 1ST NODE
680 while i>=0:
681 dataNode=self.q[i].next()
682 if dataNode is not None:
683 #print i,qu[i].queryNode,dataNode
684 if i+1<n: # MORE LEVELS TO QUERY?
685 i += 1 # ADVANCE TO NEXT QUERY LEVEL
686 self.q[i].restart()
687 else: # GRAPH MATCH IS COMPLETE!
688 yield self.queryMatch # RETURN COMPLETE MATCH
690 else: # NO MORE ACCEPTABLE NODES AT THIS LEVEL, SO BACKTRACK
691 i -= 1
693 def cleanup(self):
694 "erase any query:data node matching associated with this subquery"
695 for q in self.q:
696 q.unmark()
698 def compile(self,globals=None,compilerClass=GraphQueryCompiler,**kwargs):
699 'return a compiled version of this query, using globals namespace if specified'
700 compiler=compilerClass(globalDict=globals,**kwargs)
701 for gqi in self.q:
702 compiler+=gqi
703 return compiler
706 SubqueryGQI.gqClass=GraphQuery # CLASS FOR CONSTRUCTING SUBQUERIES