From 2b51f067b5bda957f530e8bf89baa3371ccf667e Mon Sep 17 00:00:00 2001
From: Michael Coleman <tutufan@gmail.com>
Date: Mon, 23 Apr 2007 23:17:26 -0500
Subject: [PATCH] Cleanups: dead code removal/etc.

---
 TODO             | 28 +++++++++++++++++-----------
 cgreylag.cpp     | 28 ++--------------------------
 cgreylag.hpp     |  5 +----
 cgreylag.i       | 16 ----------------
 greylag_grind.py |  6 ++----
 5 files changed, 22 insertions(+), 61 deletions(-)
diff --git a/TODO b/TODO
index 200c97b..931849b 100644
--- a/TODO
+++ b/TODO
@@ -37,22 +37,22 @@ TASK QUEUE
 * MINI-GOAL: basic greylag-process usable on our cluster (no mods)
 
 
+* Basic optimization
+** look at callgrind output
+
+* update estimate factor
+
+* Update docstrings
+* Update test cases
+
 * Compare greylag/SEQUEST/MM on test-myrimatch example (non-specific)
 ** whole file
 ** Note: SEQUEST parent tolerance differs
 
-* Look at memory usage
-
-* Basic optimization
 * Evaluate performance differences vs SEQUEST/MM/Xtandem?
 
-* Try to cut unneeded SWIG types
-
 * Design and implement greylag master process (work manifests?)
 
-* Update docstrings
-* Update test cases
-
 * Look for more dead code to remove
 
 * Redo cleavage point code (enzyme and non-specific)
@@ -62,13 +62,17 @@ TASK QUEUE
 ** A lines for each M line to document regime (index of R) and residue mods
 ** Try to keep back compatibility by grepping out A/R lines
 
-* greylag-solo
-
-* update estimate factor
+* Look at memory usage
+** maybe avoid spectrum name copies
+** maybe avoid locus name copies
+** instead of copying db sequences, use Python's?
+** kill off cleavage point lists (4M?)
 
 
 = M1 =========================================================================
 
+* greylag-solo
+
 * Implement MM smart +3 model?
 ** Is it better?
 
@@ -110,6 +114,8 @@ TASK QUEUE
 ** This will obviate the need to detect identical best matches at search time?
 ** Fix redundant peptide reporting
 
+* Make sure greylag is 64-bit clean.
+
 
 * Time a real mod search vs SEQUEST (and xtandem?), is time reasonable?
   MAKE SURE PARAMS ARE COMPARABLE!  Ballpark correctness?
diff --git a/cgreylag.cpp b/cgreylag.cpp
index f554e61..12d7f7f 100644
--- a/cgreylag.cpp
+++ b/cgreylag.cpp
@@ -497,30 +497,6 @@ score_spectrum(const spectrum &x, const spectrum &y) NOTHROW {
   const int peak_misses = (valid_theoretical_peaks
 			   - accumulate(peak_hit_histogram.begin(),
 					peak_hit_histogram.end(), 0));
-#if 0
-  for (unsigned int i=0; i<peak_hit_histogram.size(); i++)
-    if (x.intensity_class_counts[i] < peak_hit_histogram[i])
-      std::cerr << i << " C(" << x.intensity_class_counts[i] << ", "
-		<< peak_hit_histogram[i] << ")" << std::endl;
-  if (x.empty_peak_bins < peak_misses)
-    std::cerr << "M C(" << x.empty_peak_bins << ", " << peak_misses << ")"
-	      << std::endl;
-  if (x.total_peak_bins < valid_theoretical_peaks)
-    std::cerr << "T C(" << x.total_peak_bins << ", " << valid_theoretical_peaks
-	      << ")" << std::endl;
-#endif
-
-#if 0
-  for (unsigned int i=0; i<peak_hit_histogram.size(); i++)
-    std::cerr << i << " C(" << x.intensity_class_counts[i] << ", "
-	      << peak_hit_histogram[i] << ")" << std::endl;
-  std::cerr << "C(" << x.empty_peak_bins << ", " << peak_misses << ")"
-	    << std::endl;
-  std::cerr << "C(" << x.total_peak_bins << ", " << valid_theoretical_peaks
-	    << ")" << std::endl;
-  std::cerr << std::endl;
-#endif
-
   assert(peak_misses >= 0);
 
   double score = 0.0;
@@ -770,8 +746,8 @@ search_run(const search_context &context, const sequence_run &sequence_run,
 		  fixed_parent_mass);
     unsigned int end = std::max<unsigned int>(begin + 1, next_end);
     for (; end<cleavage_points.size(); end++) {
-      m.missed_cleavage_count = end - begin - 1;
-      if (m.missed_cleavage_count > context.maximum_missed_cleavage_sites)
+      const int missed_cleavage_count = end - begin - 1;
+      if (missed_cleavage_count > context.maximum_missed_cleavage_sites)
 	break;
 
       const int end_index = cleavage_points[end];
diff --git a/cgreylag.hpp b/cgreylag.hpp
index 4fae055..297f39b 100644
--- a/cgreylag.hpp
+++ b/cgreylag.hpp
@@ -281,7 +281,6 @@ struct mass_trace_item {
 class match {
 public:
   double score;
-  int missed_cleavage_count;	// FIX: unneeded
   int spectrum_index;
   std::string peptide_sequence;
   double predicted_parent_mass;
@@ -292,9 +291,7 @@ public:
   std::vector<int> peptide_begin; // absolute position within locus
   std::vector<std::string> sequence_name;
 
-  match() : score(0), missed_cleavage_count(-1), spectrum_index(-1),
-	    predicted_parent_mass(0) {
-  }
+  match() : score(0), spectrum_index(-1), predicted_parent_mass(0) { }
 };
 
 
diff --git a/cgreylag.i b/cgreylag.i
index 17703b0..d0b8b64 100644
--- a/cgreylag.i
+++ b/cgreylag.i
@@ -20,10 +20,6 @@
 %module cgreylag
 
 
-// Declare this read-only, to suppress a warning about a possible memory leak.
-%immutable mass_trace_item::description;
-
-
 %feature("autodoc");
 
 %{
@@ -72,7 +68,6 @@
 %template(vector_vector_int) std::vector< std::vector<int> >;
 %template(vector_vector_double) std::vector< std::vector<double> >;
 %template(vector_vector_match) std::vector< std::vector<match> >;
-%template(vector_vector_vector_double) std::vector< std::vector< std::vector<double> > >;
 
 
 %include std_pair.i
@@ -90,17 +85,6 @@
 %template(multimap_double_vector_size_type)
     std::multimap<double, std::vector<spectrum>::size_type>;
 
-// currently unused
-//%template(pair_char_int) std::pair<char, int>;
-//%template(pair_char_double) std::pair<char, double>;
-
-//%template(map_char_int) std::map<char, int>;
-//%template(map_char_double) std::map<char, double>;
-
-
-%include "typemaps.i"
-%apply int *OUTPUT { int *peak_count };
-
 
 %include file.i
 
diff --git a/greylag_grind.py b/greylag_grind.py
index 0a1633a..4c44952 100755
--- a/greylag_grind.py
+++ b/greylag_grind.py
@@ -42,15 +42,13 @@ __version__ = "0.0"
 
 
 import ConfigParser
-from collections import defaultdict
 import contextlib
 import cPickle
 import fileinput
 import gzip
-import itertools
 import logging
 from logging import debug, info, warning
-import math                             #??
+import math
 import optparse
 import os
 from pprint import pprint, pformat
@@ -733,7 +731,6 @@ def pythonize_swig_object(o, only_fields=None, skip_fields=[]):
 
     >>> pprint(pythonize_swig_object(cgreylag.score_stats(1, 1)))
     {'best_matches': [[{'mass_trace': [],
-                        'missed_cleavage_count': -1,
                         'peptide_begin': [],
                         'peptide_sequence': '',
                         'predicted_parent_mass': 0.0,
@@ -1176,6 +1173,7 @@ def main(args=sys.argv[1:]):
     # read spectra per work slice
     spectra = read_spectra_slice(spectrum_fns, spectrum_offset_indices,
                                  options.work_slice)
+    del spectrum_offset_indices
     spectra.sort(key=lambda x: x.mass)
 
     def peak_statistics(spectra):
-- 
2.11.4.GIT