From 6921d5b4bba45a40623e2952103d77bd27c48050 Mon Sep 17 00:00:00 2001 From: Christopher Lee Date: Tue, 1 Sep 2009 21:44:49 -0700 Subject: [PATCH] revised gff3 code --- doc/rest/tutorials/gene.rst | 48 +++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/doc/rest/tutorials/gene.rst b/doc/rest/tutorials/gene.rst index ad6c06a..d2eb714 100644 --- a/doc/rest/tutorials/gene.rst +++ b/doc/rest/tutorials/gene.rst @@ -127,29 +127,39 @@ info:: else: setattr(self, attr, val) - class GFF3File(object): - def __init__(self, filename, genome): - d = {} - self.filename = filename - self.genome = genome - ifile = file(filename) - for line in ifile: # parse all the GFF3 lines - if line.startswith('#'): # ignore this line - continue - row = GFF3Row(line) - try: - d.setdefault(row.type, {})[row.ID] = row - except AttributeError: - pass - ifile.close() - for atype,sliceDB in d.items(): # create annotation DBs - adb = annotation.AnnotationDB(sliceDB, genome) - setattr(self, atype, adb) - The key fields this must provide to be used as slice info are id, start, stop, and orientation. + +Next, let's write a reader that will read all the annotations in +a GFF3 file:: + + def read_gff3(filename, genome): + d = {} # for different types of sliceDBs + ifile = file(filename) + for line in ifile: # parse all the GFF3 lines + if line.startswith('#'): # ignore this line + continue + row = GFF3Row(line) + try: + d.setdefault(row.type, {})[row.ID] = row + except AttributeError: + pass # no type or ID so ignore... + ifile.close() + annotations = {} + for atype,sliceDB in d.items(): # create annotation DBs + adb = annotation.AnnotationDB(sliceDB, genome) + annotations[atype] = adb + return annotations + +Now we can read a file and turn it into annotation databases as easily +as:: + + annots = read_gff3('eden.gff3', genome) + print 'annotation types:', len(annots) + print 'mRNAs:', len(annots['mRNA']) + TO DO ----- -- 2.11.4.GIT