3 from __future__
import generators
7 class QueryMatchWrapper(dict):
8 "build a queryMatch mapping on demand, since its not actually needed during query traversal"
9 def __init__(self
,dataMatch
,compiler
):
11 for k
,v
in dataMatch
.items(): # INVERT THE DATA MATCH MAPPING
13 for i
in range(compiler
.n
): # ALSO SAVE MAPPINGS TO DATA EDGES
15 self
[gqi
.fromNode
,gqi
.queryNode
]=compiler
.dataEdge
[i
]
17 class QueryMatchDescriptor(object):
18 def __get__(self
,obj
,objtype
):
19 return QueryMatchWrapper(obj
.dataMatch
,obj
)
22 class QueryMatcher(object):
23 "map a query node or edge on demand"
24 def __init__(self
,compiler
):
25 self
.compiler
=compiler
26 def __getitem__(self
,k
):
27 for q
,d
in self
.iteritems():
32 for k
,v
in self
.iteritems():
35 for dataNode
,queryNode
in self
.compiler
.dataMatch
.items():
36 yield queryNode
,dataNode
# RETURN NODE MAPPINGS
37 for i
in range(self
.compiler
.n
): # ALSO SAVE MAPPINGS TO DATA EDGES
38 gqi
=self
.compiler
.gqi
[i
] # RETURN EDGE MAPPINGS
39 yield (gqi
.fromNode
,gqi
.queryNode
),self
.compiler
.dataEdge
[i
]
41 return [x
for x
in self
.iteritems()]
43 return '{'+','.join([repr(k
)+':'+repr(v
) for k
,v
in self
.iteritems()])+'}'
47 class GraphQueryCompiler(object):
48 'compile a series of GraphQueryIterators into python code, run them'
49 #queryMatch=QueryMatchDescriptor()
50 _lang
="" # NO LANGUAGE STEM
51 def __init__(self
,name
='graphquery',globalDict
=None):
59 self
.queryLayerGraph
=dictGraph()
61 if globalDict
is None:
64 self
._compiled
=globalDict
65 self
.queryMatch
=QueryMatcher(self
)
66 def __getitem__(self
,key
):
67 'return appropropriate code for accessing nodes / edges in data or query'
72 elif key
=='dataGraph':
73 queryEdge
=self
.gqi
[self
.n
].queryGraph
[self
.gqi
[self
.n
].fromNode
][self
.gqi
[self
.n
].queryNode
]
74 try: # CHECK IF QUERY EDGE USES A NON-DEFAULT DATA GRAPH
75 dg
=queryEdge
['dataGraph']
76 return 'self.gqi[%d].dataGraph' % self
.n
77 except (TypeError,KeyError):
80 return 'self.gqi[%d].filter' % self
.n
81 elif key
=='toQueryNode':
82 return 'self.gqi[%d].queryNode' % self
.n
83 elif key
=='fromQueryNode':
84 return 'self.gqi[%d].fromNode' % self
.n
86 layer
=self
.queryLayerGraph
[self
.gqi
[self
.n
].queryNode
]
88 layer
=self
.queryLayerGraph
[self
.gqi
[self
.n
].fromNode
]
89 if key
[-8:]=='DataNode': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
90 return 'dataNode%d' %layer
.values()[0]
91 if key
[-8:]=='DataEdge': # GET LAST LAYER, WHERE THIS EDGE CREATED
92 return 'dataEdge[%d]' %layer
.values()[-1]
96 return getattr(self
.gqi
[self
.n
],key
)
97 except AttributeError:
98 raise KeyError('%s not a valid GraphQueryCompiler key' % key
)
100 def indent_code(self
,codestr
,current_indent
):
101 'calculate indentation levels added by code in codestr'
102 codestr
=codestr
% self
# PERFORM MACRO SUBSTITUTIONS
103 lines
=codestr
.split('\n')
105 if lastline
=='' and len(lines
)>1: # IGNORE TERMINAL BLANK LINE
107 nindent
=len(lastline
.split('\t'))-1 # DETERMINE FINAL INDENTATION LEVEL
108 if len(lastline
)>0 and lastline
[-1]==':':
110 s
='' # NOW FORMAT THE CODE WITH PROPER INDENTATION LEVEL
112 s
+=current_indent
*'\t'+line
+'\n'
113 return s
,current_indent
+nindent
115 def __iadd__(self
,gqi
):
116 'add a GraphQueryIterator to be compiled into this query'
118 if gqi
.queryNode
not in self
.queryLayerGraph
: # NOT ALREADY BOUND?
119 self
.queryLayerGraph
+=gqi
.queryNode
120 codestr
=getattr(gqi
,self
._lang
+'_generator_code')
121 markstr
=getattr(gqi
,self
._lang
+'_index_code')
122 unmarkstr
=getattr(gqi
,self
._lang
+'_unmark_code')
124 endcode
=getattr(gqi
,self
._lang
+'_end_code')
125 except AttributeError:
128 nextcode
=getattr(gqi
,self
._lang
+'_next_code')
129 except AttributeError:
131 self
.lastGenerator
=self
.n
133 codestr
=getattr(gqi
,self
._lang
+'_closure_code')
135 unmarkstr
=getattr(gqi
,self
._lang
+'_unmark_closure_code')
137 endcode
=getattr(gqi
,self
._lang
+'_end_closure_code')
138 except AttributeError:
141 nextcode
=getattr(gqi
,self
._lang
+'_next_closure_code')
142 except AttributeError:
144 #BIND QUERY EDGE TO THIS LAYER
145 self
.queryLayerGraph
[gqi
.queryNode
][gqi
.fromNode
]=self
.n
146 try: # GET INDENTATION LEVEL FROM PREVIOUS LAYER
147 current_indent
=self
.indent
[-1]
149 current_indent
=1 # TOPLEVEL: MUST INDENT INSIDE def
150 self
.end_code
.append(self
.indent_code(endcode
,current_indent
)[0])
151 s
,current_indent
=self
.indent_code(codestr
,current_indent
)
152 self
.next_code
.append(self
.indent_code(nextcode
,current_indent
)[0])
153 if hasattr(gqi
,'filter'):
154 s2
,current_indent
=self
.indent_code(getattr(gqi
,self
._lang
+'_filter_code'),
157 if hasattr(gqi
,'filtercode'):
158 s2
,current_indent
=self
.indent_code(gqi
.filtercode
,current_indent
)
160 if markstr
is not None:
161 s2
,current_indent
=self
.indent_code(markstr
,current_indent
)
163 if unmarkstr
is not None:
164 s2
,tail_indent
=self
.indent_code(unmarkstr
,current_indent
)
165 self
.unmark_code
.append(s2
)
167 self
.unmark_code
.append('') # NO UNMARK CODE, SO JUST APPEND BLANK
169 self
.indent
.append(current_indent
)
171 return self
# iadd MUST RETURN self!!
173 def %(name)s(self,dataGraph,dataMatch=None,queryMatch=None):
174 if dataMatch is None: dataMatch={}
175 self.dataMatch=dataMatch
176 dataEdge=%(n)d*[None]
177 self.dataEdge=dataEdge
179 _yield_code
='yield self.queryMatch\n'
182 'generate code for this query, as a string function definition'
183 s
=self
._def
_code
% self
184 for layer
in self
.code
: # GENERATE ALL THE TRAVERSAL CODE
186 s2
=self
.indent_code(self
._yield
_code
,self
.indent
[-1])[0] # yield THE RESULT
188 i
=len(self
.unmark_code
)-1
189 while i
>=0: # GENERATE THE UNMARKING CODE...
190 s
+=self
.unmark_code
[i
]
194 s
+=self
._end
_code
% self
196 def run(self
,dataGraph
,*args
,**kwargs
):
197 'run the query, pre-compiling it if necessary'
198 try: # JUST TRY RUNNING OUR FUNCTION: IT RETURNS AN ITERATOR
199 return self
._compiled
[self
.name
](self
,dataGraph
,*args
,**kwargs
)
202 return self
._compiled
[self
.name
](self
,dataGraph
,*args
,**kwargs
) # RUN IT
204 'compile using Python exec statement'
205 exec str(self
) in self
._compiled
# COMPILE OUR FUNCTION
209 def find_distutils_lib(path
='build'):
210 'locate the build/lib path where distutils builds modules'
212 dirs
=os
.listdir('build') # TRY TO FIND LIB DIRECTORY CONTAINING BUILT MODULE
216 raise OSError((1,'unable to locate build/lib where distutils built your module!'))
220 class GraphQueryPyrex(GraphQueryCompiler
):
221 'compile a series of GraphQueryIterators into pyrex code, run them'
222 #queryMatch=QueryMatchDescriptor()
223 _lang
="_pyrex" # NO LANGUAGE STEM
224 def __getitem__(self
,key
):
225 'return appropropriate code for accessing nodes / edges in data or query'
230 elif key
=='dataGraph':
231 try: # CHECK IF QUERY EDGE USES A NON-DEFAULT DATA GRAPH
232 queryEdge
=self
.gqi
[self
.n
].queryGraph
[self
.gqi
[self
.n
].fromNode
][self
.gqi
[self
.n
].queryNode
]
233 dg
=queryEdge
['dataGraph']
234 return 'self.gqi[%d].dataGraph' % self
.n
235 except (TypeError,KeyError):
238 return 'self.gqi[%d].filter' % self
.n
239 elif key
=='toQueryNode':
240 return 'self.gqi[%d].queryNode' % self
.n
241 elif key
=='fromQueryNode':
242 return 'self.gqi[%d].fromNode' % self
.n
244 layer
=self
.queryLayerGraph
[self
.gqi
[self
.n
].queryNode
]
245 elif key
[:4]=='from':
246 layer
=self
.queryLayerGraph
[self
.gqi
[self
.n
].fromNode
]
247 if key
[-8:]=='DataNode': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
248 return 'dataNode%d' %layer
.values()[0]
249 if key
[-8:]=='DataEdge': # GET LAST LAYER, WHERE THIS EDGE CREATED
250 return 'dataEdge%d' %layer
.values()[-1]
251 if key
[-8:]=='DataDict': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
252 return 'cDict%d' %layer
.values()[0]
253 if key
[-7:]=='DataPtr': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
254 return 'pDictEntry%d' %layer
.values()[0]
255 if key
[-11:]=='DataPtrCont': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
256 return 'pGraphEntry%d' %layer
.values()[0]
257 if key
[-11:]=='DataCounter': # GET 1ST LAYER WHERE THIS NODE ASSIGNED
258 return 'i%d' %layer
.values()[0]
259 if key
=='toDataNodeUnmatched':
260 l
=['dataNode%d!=dataNode%d' %(self
.queryLayerGraph
[self
.gqi
[i
].queryNode
].values()[0],
261 self
.queryLayerGraph
[self
.gqi
[self
.n
].queryNode
].values()[0])
262 for i
in range(self
.n
)]
264 return ' and '.join(l
)
267 if key
=='dataNodeDefs':
268 return ','.join(['dataNode%d' %i for i
in range(self
.n
)])
269 if key
=='dataEdgeDefs':
270 return ','.join(['dataEdge%d' %i for i
in range(self
.n
)])
271 if key
=='dataDictDefs':
272 return ','.join(['*cDict%d' %i for i
in range(self
.n
)])
273 if key
=='dataPtrDefs':
274 return ','.join(['*pDictEntry%d' %i for i
in range(self
.n
)])
275 if key
=='dataPtrContDefs':
276 return ','.join(['*pGraphEntry%d' %i for i
in range(self
.n
)])
277 if key
=='dataCounterDefs':
278 return ','.join(['i%d' %i for i
in range(self
.n
)])
279 if key
=='dataCounterArgs':
280 return ','.join(['int i%d' %i for i
in range(self
.n
)])
282 return ','.join(['ita.vector[%d]' %i for i
in range(self
.n
)])
284 return ',\\\n'.join(['p_ita[%d]' %i for i
in range(2*self
.n
)])
285 if key
=='resultTuple':
286 return ',\\\n'.join(['dataNode%d,dataEdge%d'
287 %(self
.queryLayerGraph
[self
.gqi
[i
].queryNode
].values()[0],i
)
288 for i
in range(self
.n
)])
289 if key
=='resultTuples':
290 return ','.join(['(dataNode%d,dataEdge%d)'
291 %(self
.queryLayerGraph
[self
.gqi
[i
].queryNode
].values()[0],i
)
292 for i
in range(self
.n
)])
293 if key
=='level' or key
=='nEdges':
295 if key
=='lastGenerator':
296 return self
.lastGenerator
298 return getattr(self
.gqi
[self
.n
],key
)
299 except AttributeError:
300 raise KeyError('%s not a valid GraphQueryPyrex key' % key
)
304 cdef c_%(name)s(cdict.CGraphDict cgd,cdict.IntTupleArray ita,%(dataCounterArgs)s):
305 cdef cdict.CGraph *dataGraph
306 cdef cdict.CDict %(dataDictDefs)s
307 cdef cdict.CDictEntry %(dataPtrDefs)s
308 cdef cdict.CDictEntry *pd_temp
309 cdef cdict.CGraphEntry %(dataPtrContDefs)s
310 cdef cdict.CGraphEntry *p_temp
311 #cdef int %(dataCounterDefs)s
312 cdef int %(dataNodeDefs)s
313 cdef int %(dataEdgeDefs)s
319 %(itaTuple)s=%(resultTuple)s
320 p_ita=p_ita+2*%(nEdges)d
322 if ita.n>=ita.n_alloc:
323 ita.set_vector((%(dataCounterDefs)s),1)
326 #results.append((%(resultTuples)s))\n
328 ita.isDone=1 # COMPLETED THIS QUERY
330 from pygr import cdict
331 def %(name)s(self,g,int maxhit=1000,cdict.IntTupleArray ita=None,qml=None):
332 if not isinstance(g,cdict.CGraphDict):
333 g=cdict.CGraphDict(g,cdict.KeyIndex())
335 ita=cdict.IntTupleArray(maxhit,%(nEdges)d,2,%(lastGenerator)d)
337 c_%(name)s(g,ita,%(itaVector)s) # RUN THE QUERY
342 return cdict.QueryMatchList(self,ita,g,%(name)s)
345 'compile using Pyrex, Distutils, and finally import!'
347 try: # WE NEED ACCESS TO PYGR SOURCE TO ACCESS cgraph FUNCTIONS IN THIS MODULE...
348 pygrpath
=os
.environ
['PYGRPATH']
350 raise OSError((1,"""pyrex compilation requires access to pygr source.
351 Please set the environment variable PYGRPATH to the top of the pygr source package."""))
352 if not os
.access(pygrpath
+'/pygr/cgraph.c',os
.R_OK
):
353 raise OSError((1,"""Unable to access %s/pygr/cgraph.c.
354 Is PYGRPATH set to the top of the pygr source package?""" % pygrpath
))
355 exit_status
=os
.system('cp %s/pygr/cgraph.c %s/pygr/cgraph.h %s/pygr/cdict.pxd .'
356 % (pygrpath
,pygrpath
,pygrpath
))
357 if exit_status
!=0: # RUN THE PYREX COMPILER TO PRODUCE C
358 raise OSError((exit_status
,
359 'unable to copy source code to this directory.'))
360 modulename
=self
.name
+str(id(self
)) # CONSTRUCT A UNIQUE NAME FOR MODULE
361 myfile
=file(modulename
+'.pyx','w') # GENERATE PYREX CODE
362 myfile
.write(str(self
)) # WRITE CODE
364 exit_status
=os
.system('pyrexc %s.pyx' % (modulename
))
365 if exit_status
!=0: # RUN THE PYREX COMPILER TO PRODUCE C
366 raise OSError((exit_status
,
367 'pyrex compilation failed. Is pyrex missing or not in your PATH?'))
368 from distutils
.core
import setup
, Extension
369 module1
= Extension(modulename
,sources
= ['cgraph.c', modulename
+'.c'])
370 setup (name
= modulename
,description
= 'autogenerated by pygr.graphquery',
371 ext_modules
= [module1
],script_args
=['build']) # BUILD MODULE USING distutils
372 modulepath
=find_distutils_lib() # FIND DIRECTORY CONTAINING OUR BUILT MODULE
373 # WORKAROUND NASTY PROBLEM WITH PYREX cimport: NO WAY TO TELL IT MODULE IS
374 # IN A SUBDIRECTORY! I.E. from pygr cimport cdict FAILS, AS DOES cimport pygr.cdict
375 # INSTEAD WE HAVE TO SAY cimport cdict
378 from pygr
import cdict
379 sys
.path
+=[os
.path
.dirname(cdict
.__file
__)] # SO WE HAVE TO ADD ITS LOCATION TO OUR PATH
380 import imp
# FINALLY, TRY TO IMPORT THE NEW MODULE
381 modulefile
,path
,desc
=imp
.find_module(modulename
,[modulepath
])
382 self
._module
=imp
.load_module(modulename
,modulefile
,path
,desc
) # LOAD & BIND THE MODULE
383 self
._compiled
[self
.name
]=getattr(self
._module
,self
.name
) # BIND OUR QUERY FUNCTION
390 class GraphQueryIterator(object):
391 """iterator for a single node in graph query. Subclasses provide different
392 flavors of generator methods: graph w/ edges; container; attr; function etc."""
393 def __init__(self
,fromNode
,queryNode
,dataGraph
,queryGraph
,
394 dataMatch
,queryMatch
,attrDict
={}):
395 self
.fromNode
=fromNode
396 self
.queryNode
=queryNode
397 self
.dataGraph
=dataGraph
398 self
.queryGraph
=queryGraph
399 self
.dataMatch
=dataMatch
400 self
.queryMatch
=queryMatch
402 for attr
,val
in attrDict
.items(): # SAVE OUR EDGE INFO
403 setattr(self
,attr
,val
) # JUST ATTACH EDGE INFO AS ATTRIBUTES OF THIS OBJ
405 ## self.nq=len(self.queryGraph[self.queryNode])
410 "reset the iterator to its beginning"
412 if self
.fromNode
!= None:
413 self
.dataNode
=self
.queryMatch
[self
.fromNode
]
414 if self
.queryNode
in self
.queryMatch
: # ALREADY ASSIGNED TO A DATA NODE
416 if self
.fromNode
is None: # NO PATH TO HERE, SO JUST ECHO SINGLETON
417 self
.iterator
=self
.echo()
418 else: # CHECK FOR PATH FROM fromNode TO THIS DATA NODE
419 self
.iterator
=self
.closure()
421 self
.iterator
=self
.generate()
424 "Just return what our node is ALREADY matched to"
425 yield self
.queryMatch
[self
.queryNode
],None
428 "this node is already matched. Make sure there exists path to it (closure)"
429 targetNode
=self
.queryMatch
[self
.queryNode
]
430 try: # GENERATE IFF container HAS EDGE TO targetNode
431 container
=self
.dataGraph
[self
.dataNode
]
432 yield targetNode
,container
[targetNode
]
436 "generate all neighbors of data node matched to fromNode"
438 it
=self
.dataGraph
[self
.dataNode
]
442 for i
,e
in it
.items():
447 it%(level)d=%(dataGraph)s[%(fromDataNode)s]
450 for %(toDataNode)s,%(toDataEdge)s in it%(level)d.items():"""
452 if %(toDataNode)s in dataMatch:
455 dataMatch[%(toDataNode)s]=%(toQueryNode)s
456 #queryMatch[%(toQueryNode)s]=%(toDataNode)s
457 #queryMatch[%(fromQueryNode)s,%(toQueryNode)s]=%(toDataEdge)s
458 # THIS LINE PREVENTS COMPILER FROM PUSHING EXTRA INDENTATION LAYER"""
459 _filter_code
="if self.gqi[%(level)d].filter(toNode=%(toDataNode)s,fromNode=%(fromDataNode)s,edge=%(toDataEdge)s,queryMatch=self.queryMatch,gqi=self.gqi[%(level)d]):"
461 del dataMatch[%(toDataNode)s]
462 #del queryMatch[%(toQueryNode)s]
463 #del queryMatch[%(fromQueryNode)s,%(toQueryNode)s]"""
466 %(toDataEdge)s=%(dataGraph)s[%(fromDataNode)s][%(toDataNode)s]
470 #queryMatch[%(fromQueryNode)s,%(toQueryNode)s]=%(toDataEdge)s"""
471 _unmark_closure_code
="#del queryMatch[%(fromQueryNode)s,%(toQueryNode)s]"
474 _pyrex_generator_code
="""
475 p_temp=cdict.cgraph_getitem(%(dataGraph)s,%(fromDataNode)s)
477 %(fromDataDict)s=p_temp[0].v
478 %(toDataPtr)s=%(fromDataDict)s[0].dict
479 while %(toDataCounter)s < %(fromDataDict)s[0].n:
480 %(toDataNode)s=%(toDataPtr)s[%(toDataCounter)s].k
481 %(toDataEdge)s=%(toDataPtr)s[%(toDataCounter)s].v
483 #for %(toDataCounter)s from 0 <= %(toDataCounter)s < %(fromDataDict)s[0].n:
484 _pyrex_index_code
='if %(toDataNodeUnmatched)s:'
485 _pyrex_unmark_code
='# COMPILER NEEDS AT LEAST ONE LINE, EVEN THOUGH NOTHING TO DO HERE'
486 _pyrex_next_code
='%(toDataCounter)s=%(toDataCounter)s+1'
487 _pyrex_end_code
='%(toDataCounter)s=0'
488 _pyrex_closure_code
="""
489 p_temp=cdict.cgraph_getitem(%(dataGraph)s,%(fromDataNode)s)
491 %(fromDataDict)s=p_temp[0].v
492 pd_temp=cdict.cdict_getitem(%(fromDataDict)s,%(toDataNode)s)
494 %(toDataEdge)s=pd_temp[0].v
496 _pyrex_unmark_closure_code
='# COMPILER NEEDS AT LEAST ONE LINE, EVEN THOUGH NOTHING TO DO HERE'
499 "erase node and edge assignment associated with the iterator"
500 if self
.mustMark
and self
.queryNode
in self
.queryMatch
:
501 i
=self
.queryMatch
[self
.queryNode
] # ERASE OLD NODE ASSIGNMENT
502 del self
.dataMatch
[i
]
503 del self
.queryMatch
[self
.queryNode
]
504 try: del self
.queryMatch
[(self
.fromNode
,self
.queryNode
)] #ERASE OLD EDGE
505 except KeyError: pass
509 "returns the next node from iterator that passes all tests"
511 for i
,e
in self
.iterator
: # RETURN THE FIRST ACCEPTABLE ITEM
512 ## try: # THIS EDGE COUNT CHECK WON'T WORK IF MULTIPLE GRAPHS BEING QUERIED!!
513 ## nd=len(self.dataGraph[i]) # CHECK # OF OUTGOING EDGES
516 ## if nd>=self.nq and
517 if self
.mustMark
and i
in self
.dataMatch
:
518 continue # THIS NODE ALREADY ASSIGNED. CAN'T REUSE IT!
519 if (not hasattr(self
,'filter') # APPLY EDGE / NODE TESTS HERE
520 or self
.filter(toNode
=i
,fromNode
=self
.dataNode
,edge
=e
,
521 queryMatch
=self
.queryMatch
,gqi
=self
)):
523 self
.dataMatch
[i
]=self
.queryNode
# SAVE THIS NODE ASSIGNMENT
524 self
.queryMatch
[self
.queryNode
]=i
525 if e
is not None: # SAVE EDGE INFO, IF ANY
526 self
.queryMatch
[(self
.fromNode
,self
.queryNode
)]=e
527 return i
# THIS ITEM PASSES ALL TESTS. RETURN IT
528 return None # NO MORE ITEMS FROM THE ITERATOR
533 class ContainerGQI(GraphQueryIterator
):
534 "Iterate over all nodes in self.dataGraph"
536 for i
in self
.dataGraph
:
539 %(toDataEdge)s=None # CONTAINER
540 for %(toDataNode)s in dataGraph:"""
542 _pyrex_generator_code
="""
543 %(toDataPtrCont)s=%(dataGraph)s[0].dict
544 for %(toDataCounter)s from 0 <= %(toDataCounter)s < %(dataGraph)s[0].n:
545 %(toDataNode)s=%(toDataPtrCont)s[%(toDataCounter)s].k
546 %(toDataEdge)s= -1 # NO EDGE INFO
551 class AttributeGQI(GraphQueryIterator
):
552 "Iterate over all nodes in attribute called self.attr of self.dataNode"
554 for i
,e
in getattr(self
.dataNode
,self
.attr
).items():
556 _generator_code
="for %(toDataNode)s,%(toDataEdge)s in getattr(%(fromDataNode)s,'%(attr)s').items():"
558 class AttrContainerGQI(GraphQueryIterator
):
559 "Iterate over all nodes in attribute called self.attrN of self.dataNode (no edge info)"
561 for i
in getattr(self
.dataNode
,self
.attrN
):
565 for %(toDataNode)s in getattr(%(fromDataNode)s,'%(attrN)s'):"""
567 class CallableGQI(GraphQueryIterator
):
568 "Call the specified function self.f as iterator"
570 for i
,e
in self
.f(self
.dataNode
,self
.dataGraph
,self
):
572 _generator_code
="for %(toDataNode)s,%(toDataEdge)s in self.gqi[%(level)d].f(%(fromDataNode)s,dataGraph,self.gqi[%(level)d]):"
574 class CallableContainerGQI(GraphQueryIterator
):
575 "Call the specified function self.fN as iterator (no edge info)"
577 for i
in self
.fN(self
.dataNode
,self
.dataGraph
,self
):
581 for %(toDataNode)s in self.gqi[%(level)d].fN(%(fromDataNode)s,dataGraph,self.gqi[%(level)d]):"""
583 class SubqueryGQI(GraphQueryIterator
):
584 """base class for running subqueries; produces a union of all subquery solutions.
585 self.subqueries must be list of graph objects, each representing a subquery"""
586 def __init__(self
,fromNode
,queryNode
,dataGraph
,queryGraph
,
587 dataMatch
,queryMatch
,attrDict
={}):
588 GraphQueryIterator
.__init
__(self
,fromNode
,queryNode
,dataGraph
,queryGraph
,
589 dataMatch
,queryMatch
,attrDict
)
591 for qg
in self
.subqueries
: # INITIALIZE OUR SUBQUERIES
592 self
.graphQueries
.append(self
.gqClass(self
.dataGraph
,qg
,
593 dataMatch
,queryMatch
))
595 "Generate union of all solutions returned by all subqueries"
596 for gq
in self
.graphQueries
:
597 for d
in gq
: # LAUNCHES THE GRAPH QUERY, GETS ALL ITS SOLUTIONS
598 yield self
.queryMatch
[self
.queryNode
],None
599 gq
.cleanup() # REMOVE ITS QUERY-DATA MAPPING BEFORE GOING TO NEXT SUBQUERY
604 def newGQI(self
,oclass
,fromNode
,toNode
,dataGraph
,queryGraph
,
605 dataMatch
,queryMatch
,gqiDict
):
606 """figure out a default GQI class to use, based on an attribute dictionary,
607 then return a new object of that class initialized with the input data."""
608 if fromNode
is not None and toNode
is not None and \
609 queryGraph
[fromNode
][toNode
] is not None:
610 kwargs
=queryGraph
[fromNode
][toNode
]
612 try: oclass
=gqiDict
[attr
] # USE ATTRIBUTE NAME TO DETERMINE DEFAULT CLASS
613 except KeyError: pass
616 try: oclass
=kwargs
['__class__'] # LET USER SET CLASS TO USE
617 except KeyError: pass
618 return oclass(fromNode
,toNode
,dataGraph
,queryGraph
,dataMatch
,queryMatch
,kwargs
)
622 class GraphQuery(object):
623 "represents a single query or subquery"
624 # DEFAULT MAPPING OF ATTRIBUTE NAMES TO GQI CLASSES TO USE WITH THEM
625 gqiDict
={'attr':AttributeGQI
,
626 'attrN':AttrContainerGQI
,
628 'fN':CallableContainerGQI
,
629 'subqueries':SubqueryGQI
}
630 newGQI
=newGQI
# USE THIS METHOD TO CHOOSE GQI CLASS FOR EACH ITERATOR
631 def __init__(self
,dataGraph
,queryGraph
,dataMatch
=None,queryMatch
=None):
632 'Enumerate nodes in queryGraph in BFS order, constructing iterator stack'
633 self
.dataGraph
=dataGraph
634 self
.queryGraph
=queryGraph
635 if dataMatch
is None:
637 if queryMatch
is None:
639 self
.dataMatch
=dataMatch
640 self
.queryMatch
=queryMatch
641 # 1ST NEED TO FIND START NODES, PROCESS THEM 1ST, MARK THEM AS GENERATE ALL...
643 for node
in queryGraph
:
644 for node2
in queryGraph
[node
]:
645 isFollower
[node2
]=True # node2 HAS INCOMING EDGE, SO NOT A START NODE!
649 for node
in queryGraph
: # PLACE START NODES AT HEAD OF QUEUE
650 if node
not in isFollower
:
651 q
.append(self
.newGQI(ContainerGQI
,None,node
,dataGraph
,queryGraph
,
652 dataMatch
,queryMatch
,self
.gqiDict
))
654 if n
==0: # NO START NODES, SO JUST ADD THE FIRST QUERY NODE TO THE QUEUE
655 for node
in queryGraph
:
656 q
.append(self
.newGQI(ContainerGQI
,None,node
,dataGraph
,queryGraph
,
657 dataMatch
,queryMatch
,self
.gqiDict
))
659 break # JUST ADD THE FIRST NODE TO THE QUEUE
660 if n
==0: raise ValueError('query graph is empty!')
663 while i
<n
: # ADD NODE TO QUEUE EVEN IF ALREADY VISITED, BUT DON'T ADD ITS NEIGHBORS
664 if q
[i
].queryNode
not in visited
: # ADD NEIGHBORS TO THE QUEUE
665 visited
[q
[i
].queryNode
]=True # MARK AS VISITED
666 for node
in queryGraph
[q
[i
].queryNode
]: # GET ALL ITS NEIGHBORS
667 #print 'QUEUE:',n,node
668 q
.append(self
.newGQI(GraphQueryIterator
,q
[i
].queryNode
,node
,
669 dataGraph
,queryGraph
,
670 dataMatch
,queryMatch
,self
.gqiDict
))
676 "generates all subgraphs of dataGraph matching queryGraph"
679 self
.q
[0].restart() # PRELOAD ITERATOR FOR 1ST NODE
681 dataNode
=self
.q
[i
].next()
682 if dataNode
is not None:
683 #print i,qu[i].queryNode,dataNode
684 if i
+1<n
: # MORE LEVELS TO QUERY?
685 i
+= 1 # ADVANCE TO NEXT QUERY LEVEL
687 else: # GRAPH MATCH IS COMPLETE!
688 yield self
.queryMatch
# RETURN COMPLETE MATCH
690 else: # NO MORE ACCEPTABLE NODES AT THIS LEVEL, SO BACKTRACK
694 "erase any query:data node matching associated with this subquery"
698 def compile(self
,globals=None,compilerClass
=GraphQueryCompiler
,**kwargs
):
699 'return a compiled version of this query, using globals namespace if specified'
700 compiler
=compilerClass(globalDict
=globals,**kwargs
)
706 SubqueryGQI
.gqClass
=GraphQuery
# CLASS FOR CONSTRUCTING SUBQUERIES