..
[pli.git] / pli / tags / tagset.py
blobba0692ab3ee0b33d5d3d401987721b435794fa8f
1 #=======================================================================
3 __version__ = '''0.4.07'''
4 __sub_version__ = '''20100626172825'''
5 __copyright__ = '''(c) Alex A. Naanou 2009-'''
8 #-----------------------------------------------------------------------
9 __doc__ = '''\
10 This module implements a basic object tagging engine.
12 This involves tag manipulation (tagging and untagging) and tag based
13 searches.
15 In this system there is almost no distinction between the tag and the
16 tagged object, other than that they are tagged by two different system
17 tags: "tag" and "object". Both are stored in the same store and treated
18 alike. There are also no restrictions to the format of either tag
19 or the tagged object, though it is recommended for the tags to be
20 str/unicode objects, so as to harness some optimisations within Python
21 and supporting libraries.
23 There are also basic structural consistency verification and restoration
24 routines implemented here.
28 The Tag Store
29 -------------
31 A dict-compatible object used to store objects and tags.
33 Semantics:
34 A key is an entity that "relates" to it's values.
36 for tags we say that the key tags it's values.
38 When the object is tagged, the whole chain (including the object)
39 is treated as a set of related tags. The tag is recorded as a key
40 and the rest are recorded in a set as a value to that key. This is
41 done for each tag in the given set.
43 NOTE: for tags the relation is asymmetrical.
45 Results:
46 + trivial and fast search.
47 + trivial, though not the fastest addition, and essentially no
48 need for balancing (unless the store is tampered with manually).
49 - redundant linkage within the tag store.
52 NOTE: there is no distinction between tagged abjects and tags other
53 than the two special tags "tag" and "object". They are both
54 treated the same and stored in one structure.
55 NOTE: the system tags are configurable.
58 Container structure:
60 <tag>: set([
61 <tag>,
62 ...]),
63 ...
64 <object>: set([]),
65 ...
70 Selectors
71 ---------
73 A selector is a means of filtering the data in the store.
75 There are two types of selectors:
76 - concatenative, returning tagsets, and
77 - non-concatenative returning direct data.
80 Selector operations:
81 tagset.all(*tags) -> tagset
82 select items where each is tagged with ALL of the given tags.
84 tagset.any(*tags) -> tagset
85 select items where each item is tagged with ANY (at least one)
86 of the given tags.
88 tagset.none(*tags) -> tagset
89 select items where each item is NOT tagged with any of the given
90 tags.
92 tagset.tags([object]) -> set
93 select the tags tagging the object.
95 if no object is given then return all the tags.
97 tagset.objects() -> set
98 select all the object in the current tagset.
100 tagset.relatedtags(*tags) -> set
101 select the tags related to the given.
103 related tags are those that also tag the seletced objects.
105 i.e. tags sutable for further specialization via .all(...)
108 tagset.chains(*tags) -> set
111 tagset.chainrelated(*tags) -> set
114 tagset.chain2tags(chain) -> list
117 tagset.tags2chain(*tags) -> str
120 NOTE: concatinative selectors also filter tags (XXX need to describe this
121 in more detail!)
125 General notes
126 -------------
128 It is expected that the number of tags will grow far slower than the
129 number of objects (after stabilizing the objects in a live system
130 will exhibit linear growth, while tags will almost plateau at some point).
132 In this approach, the number of objects will be extremely large.
134 The two sub-groups (tags and objects) have slight differences. Tags
135 tend to be highly interlinked while objects rarely exhibit linkage
136 with each other.
138 Most searches will be tag oriented, possibly with a final filtration
139 by tag or object.
141 The current architecture favors concatenative selectors, this each selector
142 will construct a new store of the same type as the parent.
147 #-----------------------------------------------------------------------
149 import copy
151 import pli.objutils as objutils
154 #-----------------------------------------------------------------------
156 # XXX need tagchain support!!! (used by tree)
159 #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
161 # TODO add an iterative select that goes good on memeory (different
162 # algorithm)...
163 # TODO make the TAG_TAG and OBJECT_TAG tags optional and/or
164 # confugurable...
165 # the problem here is the lack of ability to control these tags;
166 # in cases it might be usefull to be able to add a third system
167 # tag or remove one...
168 # ....this may be done as a seporate layer
169 # TODO use None instead of an empty set for keys that still have no
170 # relations...
173 #-----------------------------------------------------------------------
174 #------------------------------------------------------------TagError---
175 class TagError(Exception):
176 pass
180 #-----------------------------------------------------------------------
181 #-----------------------------------------------------AbstractLinkSet---
182 class AbstractLinkSet(object):
185 __stored_set_constructor__ = None
186 __tagset__ = None
189 #------------------------------------------------------AbstractTagSet---
190 class AbstractTagSet(AbstractLinkSet):
193 __tag_tag__ = 'TAG'
194 __object_tag__ = 'OBJECT'
196 ## __reverse_links__ = None
197 objutils.createonaccess('__reverse_links__', dict)
201 #-----------------------------------------------------------------------
202 #--------------------------------------------------------LinkSetMixin---
203 class LinkSetMixin(object):
206 # XXX shows signs of exponential time increase on very large sets of
207 # data... need to revise.
208 # XXX this may go bad with a very large number of args...
209 def link(self, obj, *objs):
211 link the given objects.
213 tagdb = self.__tagset__
214 tt = [obj] + list(objs)
215 tdbset = self.__stored_set_constructor__
217 for t in set(tt):
218 # remove one occurrence of self...
219 tt_c = tt[:]
220 tt_c.remove(t)
222 if t in tagdb:
223 tagdb[t].update(tt_c)
224 else:
225 tagdb[t] = tdbset(tt_c)
226 return self
227 def unlink(self, obj, *objs):
229 remove the links between objects.
231 NOTE: if an element is present more than once then remove the self link too.
232 NOTE: this will not remove orphaned tags.
234 tagdb = self.__tagset__
235 tt = [obj] + list(objs)
237 for t in tt:
238 if t not in tagdb:
239 # ignore invalid tags... (XXX should we complain here?)
240 continue
241 # remove one occurrence of self...
242 tt_c = tt[:]
243 tt_c.remove(t)
244 # remove the reqired tags...
245 tagdb[t].difference_update(tt_c)
246 # remove tag if it has no relations... (XXX)
247 if len(tagdb[t]) == 0:
248 del tagdb[t]
249 return self
250 def links(self, obj):
252 return all the links to the object.
254 NOTE: this is the same as tagdb[obj].
256 tagdb = self.__tagset__
257 ## # sanity check...
258 ## if obj not in tagdb:
259 ## raise KeyError, 'object %s not in tagdb (%s).' % (obj, tagdb)
260 return tagdb[obj]
263 #---------------------------------------------------------BasicTagSetMixin---
264 ##!!! revise...
265 class BasicTagSetMixin(AbstractTagSet):
267 defines basic taging operations.
269 def addtags(self, *tags):
271 add empty tags to tag store.
273 tag_tag = self.__tag_tag__
274 tagdb = self.__tagset__
275 tdbset = self.__stored_set_constructor__
276 for tag in tags:
277 if tag not in tagdb:
278 tagdb[tag] = tdbset()
279 # XXX should this tag the tag with __tag_tag__???
280 self._tag(tag, tag_tag)
281 def _tag(self, obj, *tags):
283 raw version of the tag(...). this does not enforce the use of system tags.
285 WARNING: not recommended for direct use.
287 tagdb = self.__tagset__
288 revlinks = self.__reverse_links__
289 tdbset = self.__stored_set_constructor__
290 # add tags...
291 for t in tags:
292 if t not in tagdb:
293 tagdb[t] = tdbset()
294 tagdb[t].add(obj)
295 # add object to db...
296 if obj not in tagdb:
297 tagdb[obj] = tdbset()
298 # add reverse links...
299 if obj not in revlinks:
300 revlinks[obj] = tdbset()
301 revlinks[obj].update(tags)
302 return self
303 def tag(self, obj, *tags):
305 tag an object...
307 this maintains two special tags:
308 tag_tag : tags all the tags (self.__tag_tag__).
309 object_tag : tags all the objects (self.__object_tag__).
311 NOTE: neither the "object" nor the "tag" tags are user modifiable.
313 tag_tag = self.__tag_tag__
314 obj_tag = self.__object_tag__
315 tagdb = self.__tagset__
316 # do special tags...
317 # can't manually use the tag and object tags...
318 if tag_tag in tags or obj_tag in tags or obj in (tag_tag, obj_tag):
319 raise TypeError, 'can\'t use either "object" or "tag" tags manually.'
320 # the tag tag...
321 for t in tags:
322 if tag_tag not in tagdb.get(t, ()):
323 self._tag(t, tag_tag)
324 # XXX two loops is not good... may be a good idea to make _tag an
325 # iterator/generator....
326 self._tag(obj, *tags)
327 # the object tag...
328 if obj_tag not in tagdb.get(obj, ()):
329 self._tag(obj, obj_tag)
330 if tag_tag not in tagdb.get(obj_tag, ()):
331 ## self.link(tag_tag, tag_tag)
332 self._tag(tag_tag, tag_tag)
333 return self
334 def untag(self, obj, *tags):
336 remove the tag relation.
338 tag_tag = self.__tag_tag__
339 obj_tag = self.__object_tag__
340 tagdb = self.__tagset__
341 revlinks = self.__reverse_links__
342 # do special tags...
343 # can't manually use the tag and object tags...
344 if tag_tag in tags or obj_tag in tags or obj in (tag_tag, obj_tag):
345 raise TypeError, 'can\'t use either "object" or "tag" tags manually.'
346 # now remove the links...
347 revlinks[obj].difference_update(tags)
348 for tag in tags:
349 tagdb[tag].remove(obj)
350 return self
353 #--------------------------------------------TagSetBasicSelectorMixin---
354 # XXX possible hack: the obj_tag is not tagged as a tag....
355 class TagSetBasicSelectorMixin(AbstractTagSet):
358 def tags(self, obj=None):
360 return the tags tagging the object.
362 NOTE: this removes all the relations that are not tags.
363 NOTE: without arguments this will return all available tags.
365 tag_tag = self.__tag_tag__
366 obj_tag = self.__object_tag__
367 tagdb = self.__tagset__
368 if tag_tag not in tagdb:
369 return set()
370 if obj is None:
371 # XXX possible hack: the obj_tag is not tagged as a tag....
372 return tagdb[tag_tag].copy().union((obj_tag,))
373 return self.__reverse_links__[obj].intersection(tagdb[tag_tag].union([obj_tag]))
374 ##!!! revise...
375 def relatedtags(self, *tags):
377 return the related tags to the given.
379 two tags are related if they both tag the same object. thus this will
380 return the tags suitable for further specialization.
382 NOTE: to get all the objects use "select(tagdb, tag, tags, __object_tag__)"
383 with the same tags...
385 obj_tag = self.__object_tag__
386 tagdb = self.__tagset__
387 revlinks = self.__reverse_links__
388 # get all the valid data...
389 objs = self.all(*tags).objects()
390 res = set()
391 # gather all the related tags...
392 for o in objs:
393 res.update(revlinks[o])
394 # remove the objects and input tags...
395 res.difference_update((obj_tag,) + tags + tuple(objs))
396 return res
397 ## # XXX should this be a prop?
398 ## @property
399 def objects(self):
401 return all the objects in the current tagset.
403 object_tag = self.__object_tag__
404 tagset = self.__tagset__
405 if object_tag in tagset:
406 return tagset[object_tag].copy()
407 return set()
411 #----------------------------------------------------TagSetUtilsMixin---
412 ##!!! make this reverse-link-aware...
413 # XXX add rebuilding of reverse-links
414 class TagSetUtilsMixin(TagSetBasicSelectorMixin):
417 NOTE: this needs to be mixed in with BasicTagSetMixin.
419 def _rebuild_system_tags(self, other):
422 other_tagdb = other.__tagset__
423 tagdb = self.__tagset__
424 # rebuild system tags...
425 tagdb[self.__tag_tag__] = set( t for t in other_tagdb[other.__tag_tag__]
426 if t in tagdb
427 or t == other.__tag_tag__ )
428 tagdb[self.__object_tag__] = set( t for t in other_tagdb[other.__object_tag__]
429 if t in tagdb
430 or t == other.__object_tag__ )
431 return self
432 def _rebuild_reverse_links(self, other):
435 self.__reverse_links__ = self.__reverse_links__.__class__( (k, v.intersection(self)) for k, v in other.__reverse_links__.items()
436 if k in self )
437 return self
439 def istagsconsistent(self):
442 a store is consistent if:
443 - all tags in relations are present in store keys.
444 - if no orphan tags are allowed (???) each tag in keys MUST also be
445 present in relations (related to).
446 - if all relations are symetrical.
448 for i in self.itertaggaps():
449 # if we get in here, it means that we have a problem...
450 return False
451 return True
452 ##!!! make this reverse-link-aware...
453 def itertaggaps(self):
455 find store inconsistencies and return the conflicting keys and relations.
457 this can not detect the folowing:
458 - missing orphaned keys (no data).
459 - interlinking between missing keys (no data).
460 - inconsistencies in relations (no way to destinguish this from
461 good data).
463 tagdb = self.__tagset__
464 keys = set(tagdb.keys())
465 for tag, rel in tagdb.items():
466 # XXX ignore orphans... (to check for them use strict equality)
467 # check for gaps (missing tagdb keys)...
468 if not keys.issuperset(rel):
469 yield tag, rel.difference(keys)
470 # check for missing symetric relations...
471 ## ##!!! can this be faster? ...is there a better algorithm?
472 ## for r in rel:
473 ## if r not in tagdb or tag not in tagdb[r]:
474 ## ##!!! this may return duplicate with the above data...
475 ## yield tag, set([r])
476 # XXX should this restore the tag to self??
477 # TODO make this an interactive generator so as to have more control
478 # over what is fixed and how...
479 ##!!! make this reverse-link-aware...
480 def filltaggaps(self):
482 fix inconsistencies using the data returned by itertaggaps.
484 NOTE: this will restore the data that can be detected and restored
485 only (no domain semantic checks are made at this level).
486 NOTE: this is maximalistic. will fill the holes rather than cut off
487 the excess.
489 # NOTE: this is split in two so as to not iterate and modify the
490 # store at the same time...
491 tagdb = self.__tagset__
492 tdb_diff = {}
493 tdbset = self.__stored_set_constructor__
494 # build the diff correcting the errors...
495 for key, dif in self.itertaggaps():
496 for k in dif:
497 if k in tdb_diff:
498 tdb_diff[k].update((key,))
499 else:
500 tdb_diff[k] = tdbset((key,))
501 # apply the diff created above...
502 for k, rel in tdb_diff.items():
503 # add a link to self (XXX this should be in _iter_store_gaps)
504 rel.update((k,))
505 if k in tagdb:
506 tagdb[k].update(rel)
507 else:
508 tagdb[k] = rel
509 # return the diff...
510 return tdb_diff
511 def removetaggaps(self):
514 ts = self.__tagset__
515 for key, dif in self.itertaggaps():
516 ts[key].difference_update(dif)
517 def iterorphans(self):
519 iterate orpahed tags.
521 obj_tag = self.__object_tag__
522 tag_tag = self.__tag_tag__
523 tagdb = self.__tagset__
524 for k, v in tagdb.items():
525 if v == None or len(v.difference((tag_tag, obj_tag))) == 0:
526 # XXX do we need this check???
527 ##!!! is .tags needed here???
528 if len(self.tags(k).difference((tag_tag, obj_tag))) == 0:
529 yield k
530 def gc(self):
532 interactive garbage collector.
534 this will iterate through the orphans and remove them.
536 to skip the removal send the string 'skip' to the generator instance.
538 Example:
540 g = gc(tagdb)
542 for tag in g:
543 if 'a' in tag:
544 g.send('skip')
547 WARNING: this will remove orphaned tags and objects. this list will
548 include tags added by addtags(..) but not yet used.
550 tag_tag = self.__tag_tag__
551 tagdb = self.__tagset__
552 for tag in self.iterorphans():
553 if (yield tag) != 'skip':
554 del tagdb[tag]
555 # cleanup the tags...
556 if tag in tagdb[tag_tag]:
557 tagdb[tag_tag].remove(tag)
560 #-----------------------------------------------------TagSetDictMixin---
561 class TagSetDictMixin(AbstractTagSet):
564 # proxy all data access to self.
565 __tagset__ = property(fget=lambda s: s)
567 objutils.createonaccess('__reverse_links__', dict)
570 #-----------------------------------------------------TagSetInitMixin---
571 class TagSetInitMixin(AbstractTagSet):
574 objutils.createonaccess('__tagset__', dict)
577 #-----------------------------------------------------------------------
578 # all basic tag selectors should return tagsets...
579 # XXX this is really ugly, need to revise/rewrite...
580 ##!!! this needs to be a mapping -- constructor interface...
581 class TagSetSelectorMixin(TagSetUtilsMixin):
584 ##!!! should these in case of tag conflicts return empty tagsets or just err?
585 def _all(self, *tags):
587 selects all objects tagged with all the tags.
589 tagdb = self.__tagset__
590 # if no tags are given return evrything we've got! :)
591 if len(tags) == 0:
592 return set()
593 # a small optimisation: order the tags to intersect out as mach as
594 # possible as early as possible... (XXX check for better strategies)
595 l = list(tags)
596 l.sort(lambda a, b: cmp(len(tagdb[a]), len(tagdb[b])))
597 # first cross the biggest and smallest...
598 tag, tags = l[0], l[1:]
599 # now do the real work...
600 visited = set(l)
601 res = set(tagdb[tag])
602 # this does the folowing:
603 # - for each tag select all the tagged objects.
604 # - intersect the set with the tagged objects of each of the next tags.
605 # - remove all the tags of the path (XXX not sure if this should be
606 # done at this stage...)
607 for t in tags:
608 res.intersection_update(tagdb[t])
609 return res.difference(visited)
610 ##!!! on coflict this produces a result not containing system tags (tagset.__init__ problem)...
611 def all(self, *tags):
613 all that are tagged with all of the tags.
615 tags = set(tags)
616 ts = self.__tagset__
617 try:
618 intersection = self._all(*tags)
619 # build a result tagset...
620 res = self.__class__([ (k, ts[k].copy())
621 for k in ts.keys()
622 if k in intersection
623 or len(ts[k].intersection(intersection)) > 0 ])
624 except KeyError:
625 ## raise TagError, 'tag "%s" not present in current tagset.' % t
626 return self.__class__()
627 res._rebuild_system_tags(self)
628 res._rebuild_reverse_links(self)
629 res.removetaggaps()
631 return res
632 ##!!! this loses all the tags but the ones given as args...
633 def any(self, *tags):
635 all that are tagged with any of the tags.
637 # take only the tags present in self and ignore the rest...
638 # XXX should we err if a tag s not present??
639 tags = set(tags).intersection(self)
640 ts = self.__tagset__
642 objects = set()
643 [ objects.update(ts[t]) for t in tags ]
645 res = self.__class__([ (k, ts[k].copy())
646 for k in ts.keys()
647 if k in tags
648 or len(tags.intersection(ts[k])) > 0
649 or k in objects ])
650 ## or ts[k].issubset(objects) ])
652 res._rebuild_system_tags(self)
653 res._rebuild_reverse_links(self)
654 res.removetaggaps()
656 return res
657 ##!!! still needs fixing: produces orphans...
658 def none(self, *tags):
660 all that are tagged with none of the tags.
662 tags = set(tags)
663 ts = self.__tagset__
665 objects = set()
666 [ objects.update(ts[t])
667 # skip tags not present in self...
668 for t in tags.intersection(self) ]
670 # NOTE: if a tagset is inconsistent, i.e. some tags are not
671 # present in keys bot still tag an object that object
672 # will get cut out...
673 # XXX this might be a good place to err...
674 res = self.__class__([ (k, ts[k].copy())
675 for k in ts.keys()
676 if (k not in tags
677 and len(tags.intersection(ts[k])) == 0
678 and k not in objects)
679 or not ts[k].issubset(objects) ])
681 res._rebuild_system_tags(self)
682 res._rebuild_reverse_links(self)
683 res.removetaggaps()
685 return res
690 #---------------------------------------------------------TagSetMixin---
691 class TagSetMixin(BasicTagSetMixin, TagSetSelectorMixin, TagSetUtilsMixin):
694 pass
698 #-----------------------------------------------------------------------
699 # tagchian mechanics...
700 # XXX move this to a different module...
701 #--------------------------------------------TagSetWithTagChainsMixin---
702 # XXX do we need chain-specific select???
703 ##!!! revise !!!##
704 ##!!! TODO select objects via sub-chains...
705 ##!!! TODO select objects via chain patterns.
706 ##!!! ex: A:B:*, A:B:*:C, ...etc.
707 ##class TagSetTagChainMixin(LinkSetMixin):
708 class TagSetTagChainMixin(object):
710 a chain is a tuple of tags.
712 the tag chain structure is as follows:
714 title <-------> Terminator
719 (title, Terminator)
721 - all the chain elements tag the chain (all-one).
722 - all chain elements are linked (all-all).
724 NOTE: this must be mixed with a valid tagset.
725 NOTE: by default the chains are represented as tuples.
726 NOTE: the chain elements do not tag the object.
728 # the tag tagging the tagchains...
729 # NOTE: if this is None, do not tag chains
730 __chain_tag__ = 'TAGCHAIN'
732 # tag interface...
733 def addtags(self, *tags):
736 tags, chains = self._splitchains(tags)
737 # process chains...
738 self._addchains(*chains)
739 super(TagSetTagChainMixin, self).addtags(*tags)
740 ##!!! return??
741 def tag(self, obj, *tags):
744 tags, chains = self._splitchains(tags)
745 self._addchains(*chains)
746 return super(TagSetTagChainMixin, self).tag(obj, *(tags+chains))
747 def _tag(self, obj, *tags):
750 tags, chains = self._splitchains(tags)
751 self._addchains(*chains)
752 return super(TagSetTagChainMixin, self)._tag(obj, *(tags+chains))
753 # XXX this may be usefull for garbage collection...
754 ## def untag(self, obj, *tags):
755 ## '''
756 ## '''
757 ## tags, chaintags, chains = self._splitchains(tags)
758 ## # XXX process chains...
759 ## ##!!!
760 ## super(TagSetTagChainMixin, self).untag(obj, *tags)
761 ## ##!!! return??
762 # chain-specific helpers...
763 def _ischain(self, tag):
765 test if a tag is tagchain compatible.
767 if type(tag) is tuple:
768 return True
769 return False
770 def _splitchains(self, tags):
772 split the tags and chains.
774 returns: <tags>, <chians>
776 t = ()
777 c = ()
778 ischain = self._ischain
779 for tag in tags:
780 if ischain(tag):
781 c += (tag,)
782 else:
783 t += (tag,)
784 return t, c
785 ##!!! need to rethink this...
786 def _addchains(self, *chains):
789 for c in chains:
790 # check if chain exists...
791 if c not in self:
792 t = self.chain2tags(c)
793 if self.__chain_tag__ != None:
794 self._tag(c, *(t+(self.__tag_tag__, self.__chain_tag__)))
795 else:
796 self._tag(c, *(t+(self.__tag_tag__,)))
797 # links all the tags in a chain...
798 ##!!! see if this is correct... (was .link(...))
799 self._tag(c, *t)
800 ## ##!!! revise...
801 ## self.link(c, *t)
802 # tag-chain specific methods...
803 @staticmethod
804 def chain2tags(chain):
806 return the tags in chain.
808 # XXX check if cahin is a chain????
809 return tuple(chain)
810 @staticmethod
811 def tags2chain(*tags):
814 return tags
815 def chains(self, *tags):
817 return all the chains that contain tags.
819 NOTE: if chains are given, then all the tags in them will be
820 added to the search.
822 tags, chains = self._splitchains(tags)
823 tags = set(tags)
824 for chain in chains:
825 tags.update(self.chain2tags(chain))
826 # collect all related chains...
827 res = self.all(self.__chain_tag__, *tags).get(self.__chain_tag__, set())
828 return res
829 ##!!!
830 def chainrelated(self, *tags):
832 return all the tags that are related via chains.
834 chains = self.chains(*tags)
835 res = set()
836 for chain in chains:
837 res.update(self.chain2tags(chain))
838 return res.difference(tags)
841 #-------------------------------------------------StringTagChainMixin---
842 # XXX add consistency checking...
843 ##!!! need to migrate this to the new interface !!!##
844 ##class StringTagChainMixin(object):
845 class StringTagChainMixin(TagSetTagChainMixin):
847 changes tagchain format to the folowing string syntax:
849 <tag>:<tag>[:...]
851 NOTE: this must be mixed with a valid tagset with chain support.
853 def _ischain(self, tag):
855 test if a tag is tagchain compatible.
857 if type(tag) in (str, unicode) \
858 and ':' in tag \
859 and False not in [ len(t) > 0 for t in tag.split(':') ]:
860 return True
861 return False
862 @staticmethod
863 def chain2tags(chain):
865 return the tags in chain.
867 # XXX check if cahin is a chain????
868 return tuple(chain.split(':'))
869 @staticmethod
870 def tags2chain(*tags):
873 return ':'.join(tags)
877 #-----------------------------------------------------------------------
878 #--------------------------------------------------------------TagSet---
879 ##class TagSet(TagSetInitMixin, TagSetMixin):
880 class TagSet(StringTagChainMixin, TagSetInitMixin, TagSetMixin):
883 __stored_set_constructor__ = set
886 #----------------------------------------------------------DictTagSet---
887 ##class DictTagSet(TagSetDictMixin, TagSetMixin, dict):
888 class DictTagSet(StringTagChainMixin, TagSetDictMixin, TagSetMixin, dict):
891 __stored_set_constructor__ = set
895 #-----------------------------------------------------------------------
896 if __name__ == '__main__':
898 from pli.testlog import logstr
899 from pli.functional import curry
901 txt = '''
902 some text that will be tagged.
904 the tagging will be like this:
905 - each word is an object.
906 - each word is tagged with the letters that it contains.
908 just in case, a tag can also be an object and vice versa.
912 logstr('''
914 ts = DictTagSet()
916 ts.tag('X', 'a', 'b', 'c')
918 >>> ts
920 ts = DictTagSet()
922 ! ts.tag('X', 'a')
923 ! ts.tag('X', 'b')
924 ! ts.tag('X', 'c')
926 >>> ts
928 ! ts.tag('Y', 'c', 'd')
930 # test of types that can be used as tags... (sanity check)
931 ! ts.tag('Z', 'string', u'unicode', True, False, 1, 1.1)
932 ! ts.tag('ZZ', ())
933 ### NOTE: tags MUST be hashable, so the following will fail.
934 ##! ts.tag('ZZ', [], {})
936 >>> ts
939 # unite:
940 ##!!! should yield a tagset only containing X
941 ts.any('a', 'b')
943 # _intersect:
944 ts._all('a')
946 # intersect:
947 ts.all('a')
949 # exclude:
950 ts.none('a')
953 ## ts.untag('X', 'a')
958 >>> ts
961 ts.istagsconsistent()
962 -> True
963 tuple(ts.itertaggaps())
964 -> ()
965 tuple(ts.iterorphans())
966 -> ()
968 words = DictTagSet()
969 ! [ words.tag(w, *tuple(w)) for w in txt.split() ]
971 ## >>> words
972 ## words.__tagset__['t']
974 ## words.all('t', 'x')
976 ##!!! Q: sould the resultin tagset be tag complete?
977 ##!!! ...i.e. .tags(obj) should return all the tags in any sub-tagset
978 ##!!! or only the relevant tags for that subset? (see next couple of lines)
979 >>> words.any('a', 'x').tags('that')
980 >>> words.tags('that')
982 >>> words.any('a', 'x').tags('a')
984 >>> words.tags('a')
986 ## >>> words.any('t', 'x').none('c')
988 >>> words.objects()
990 >>> words.any('a').objects()
992 >>> words.tags()
994 >>> words.tags('that')
996 >>> words.tags('t')
998 ##!!! is this correct???
999 >>> words.all()
1001 >>> words.relatedtags('a', 't')
1003 >>> words.relatedtags('a', 't', 'e')
1005 >>> words.relatedtags('a', 't', 'e', 'g')
1007 >>> words.relatedtags('a', 't', 'e', 'g', 'd')
1008 >>> words.all('a', 't', 'e', 'g', 'd').tags()
1009 >>> words.all('a', 't', 'e', 'g', 'd').objects()
1011 >>> words.all('a', 't', 'e', 'g', 'd').none('.').objects()
1012 >>> words.all('a', 't', 'e', 'g', 'd').none('.', 'a').objects()
1015 >>> words.all('t', 'e').tags()
1016 >>> words.all('t', 'e').any('l', 'j').objects()
1018 # errors -- tag conflicts...
1019 # XXX should these err or just return empty tagsets???
1020 >>> words.all('t', 'e').any('l').any('j').objects()
1021 >>> words.all('t', 'e').all('l').all('j').objects()
1022 >>> words.all('t', 'e').all('l').all('j').tags()
1023 >>> words.all('t', 'e').all('l')
1024 >>> words.all('t', 'e').all('l').any('j')
1025 >>> words.all('t', 'e').all('l').any('j').tags()
1026 >>> words.all('t', 'e').all('l').none('j').tags()
1027 >>> words.all('t', 'e').all('l').all('j')
1032 # test tagchain functionality...
1034 words = DictTagSet()
1036 words.tags2chain('a', 'b', 'c')
1037 -> 'a:b:c'
1038 words.chain2tags('a:b:c')
1039 -> ('a', 'b', 'c')
1041 ! words.tag('that', 'T:H:A:T')
1042 ! words.tag('this', 'T:H:I:S')
1043 ! words.tag('that', 't', 'h', 'a', 't')
1044 ! words.tag('that', 't', 'h', 'i', 's')
1046 words.tags('that')
1047 words.all('A').objects()
1049 words.chains()
1050 words.chains('A')
1051 words.chains('T:H')
1052 words.all('T', 'H').chains()
1054 words.all('T:H:A:T').objects()
1055 -> set(['that'])
1057 >>> words.all('t', 'h', 'a').objects()
1059 words.all('T:H:I:S').objects()
1060 -> set(['this'])
1062 # NOTE: this will return a tagset and not a list of objects.
1063 >>> words.all('T:H:I:S', words.__object_tag__)
1065 ''')
1069 #=======================================================================
1070 # vim:set ts=4 sw=4 nowrap :