3 (lineage LineageClassifier)
4 (java.io InputStreamReader FileInputStream PushbackReader)
5 (javax.vecmath Point3d)
6 (javax.swing JTable JFrame JScrollPane JPanel JLabel BoxLayout JPopupMenu JMenuItem)
7 (javax.swing.table AbstractTableModel DefaultTableCellRenderer)
8 (java.awt.event MouseAdapter ActionListener)
9 (java.awt Color Dimension Component)
10 (mpicbg.models AffineModel3D)
11 (ij.measure Calibration)
12 (ini.trakem2.utils Utils)
13 (ini.trakem2.display Display Display3D LayerSet)
14 (ini.trakem2.analysis Compare)
15 (ini.trakem2.vector VectorString3D Editions))
16 (:use [clojure.set :only (intersection)]))
20 `(ij.IJ/log (str ~@args)))
22 (defn- as-VectorString3D
23 "Convert a map of {\"name\" {:x (...) :y (...) :z (...)}} into a map of {\"name\" (VectorString3D. ...)}"
29 (VectorString3D. (into-array Double/TYPE (data :x))
30 (into-array Double/TYPE (data :y))
31 (into-array Double/TYPE (data :z))
35 (defn- load-SAT-lib-as-vs
36 "Returns the SATs library with VectorString3D instances"
44 (assoc data :SATs (as-VectorString3D (data :SATs))))))
48 (InputStreamReader. (FileInputStream. filepath))
51 (defn- fids-as-Point3d
52 "Convert a map like {\"blva_joint\" [70.62114008906023 140.07819545137772 -78.72010436407604] ...}
53 into a sorted map of {\"blva_joint\" (Point3d ...)}"
58 (assoc m (key e) (Point3d. (double (get xyz 0))
60 (double (get xyz 2))))))
65 "Returns a map of SAT name vs VectorString3D,
66 where the VectorString3D is registered to the target fids."
67 [brain-label brain-data target-fids]
68 (let [source-fids (fids-as-Point3d (brain-data :fiducials))
69 SATs (into (sorted-map) (brain-data :SATs))
70 common-fid-keys (intersection (into #{} (keys source-fids)) (into #{} (keys target-fids)))
71 vs (Compare/transferVectorStrings (vals SATs)
72 (vals (into (sorted-map) (select-keys source-fids common-fid-keys)))
73 (vals (into (sorted-map) (select-keys target-fids common-fid-keys)))
76 (map #(str (.replaceAll % "\\[.*\\] " " [") \space brain-label \]) (keys SATs))
80 [SAT-lib reference-brain]
81 (let [r (re-pattern (str "peduncle . (dorsal|medial) lobe.*" reference-brain ".*"))]
86 (if-let [[k v] (first sl)]
95 "Load the named SAT-lib from filepath into *libs* and returns it.
96 Will setup the reference set of fiducial points and the mushroom body lobes from the entry named by reference-brain,
97 as we as register all SATs to the reference brain."
98 [lib-name filepath reference-brain]
99 (let [SAT-lib (load-SAT-lib-as-vs filepath)
100 target-fids (fids-as-Point3d ((SAT-lib reference-brain) :fiducials))
103 (conj m (register-SATs (key e) (val e) target-fids)))
107 (commute *libs* (fn [libs]
108 (assoc libs lib-name {:filepath filepath
111 :mb (load-mb SATs reference-brain)})))))
117 "Register a singe VectorString3D from source-fids to target-fids."
118 [vs source-fids target-fids]
119 (let [common-fid-keys (intersection (into #{} (keys source-fids)) (into #{} (keys target-fids)))]
120 (if (empty? common-fid-keys)
122 (first (Compare/transferVectorStrings [vs]
123 (vals (into (sorted-map) (select-keys source-fids common-fid-keys)))
124 (vals (into (sorted-map) (select-keys target-fids common-fid-keys)))
128 "Returns a resampled copy of vs."
130 (let [copy (.clone vs)]
131 (.resample copy delta)
135 "Returns a vector of two elements; the first element is the list of matches
136 of the query-vs against all SAT vs in the library sorted by mean euclidean distance,
137 and labeled as correct of incorrect matches according to the Random Forest classifier.
138 The second element is a list of corresponding SAT names for each match.
139 Assumes the query-vs is already registered to the corresponding SAT-lib reference fiducials."
140 [SATs query-vs delta direct substring]
141 (let [vs1 (resample query-vs delta) ; query-vs is already registered into FRT42-fids
143 #(int (- (%1 :med) (%2 :med)))
146 (let [vs2 (let [copy (.clone (val e))]
147 (.resample copy delta)
149 c (Compare/findBestMatch vs1 vs2
150 (double delta) false (int 5) (float 0.5) Compare/AVG_PHYS_DIST
152 (double 1.1) (double 1.1) (double 1))
153 stats (.getStatistics (get c 0) false (int 0) (float 0) false)]
157 :correct (LineageClassifier/classify stats)}))
159 ; Cleanup thread table
160 (LineageClassifier/flush)
168 "Measure the width, in pixels, of the String text, by the Font and FontMetrics of component."
169 [#^String text #^Component component]
170 ; Get the int[] of widths of the first 256 chars
171 (let [#^ints ws (.getWidths (.getFontMetrics component (.getFont component)))]
173 (if (< (int c) (int 256))
174 (+ sum (aget ws (int c)))
180 "Takes a calibrated VectorString3D and a list of fiducial points, and checks against the library for identity.
181 For consistency in the usage of the Random Forest classifier, the registration is done into the FRT42D-BP106 brain."
182 [pipe SAT-lib query-vs fids delta direct substring]
183 (let [SATs (SAT-lib :SATs)
184 vs1 (register-vs query-vs fids (SAT-lib :fids))
185 [matches names] (match-all SATs vs1 delta direct substring)
186 SAT-names (vec names)
187 indexed (vec matches)
188 column-names ["SAT" "Match" "Seq sim %" "Lev Dist" "Med Dist" "Avg Dist" "Cum Dist" "Std Dev" "Prop Mut" "Prop Lengths" "Proximity" "Prox Mut" "Tortuosity"]
189 worker (agent nil) ; for event dispatch
190 table (JTable. (proxy [AbstractTableModel] []
192 (get column-names col))
196 (count column-names))
197 (getValueAt [row col]
198 (let [match (get indexed row)
199 stats (match :stats)]
201 (= col 0) (get SAT-names row)
202 (= col 1) (str (match :correct)) ; Whether the classifier considered it correct or not
203 true (Utils/cutNumber
205 (= col 2) (* 100 (get stats 6)) ; Similarity
206 (= col 3) (get stats 5) ; Levenshtein
207 (= col 4) (get stats 3) ; Median Physical Distance
208 (= col 5) (get stats 0) ; Average Physical Distance
209 (= col 6) (get stats 1) ; Cummulative Physical Distance
210 (= col 7) (get stats 2) ; Std Dev
211 (= col 8) (get stats 4) ; Prop Mut
212 (= col 9) (get stats 9) ; Prop Lengths
213 (= col 10) (get stats 7) ; Proximity
214 (= col 11) (get stats 8) ; Prox Mut
215 (= col 12) (get stats 10)) ; Tortuosity
217 (isCellEditable [row col]
219 (setValueAt [ob row col] nil)))
220 frame (JFrame. (str "Matches for " pipe))
221 dummy-ls (LayerSet. (.. Display getFront getProject) (long -1) "Dummy" (double 0) (double 0) (double 0) (double 0) (double 0) (double 512) (double 512) false (int 0) (java.awt.geom.AffineTransform.))]
222 (.setCellRenderer (.getColumn table "Match")
223 (proxy [DefaultTableCellRenderer] []
224 (getTableCellRendererComponent [t v sel foc row col]
225 (proxy-super setText (str v))
226 (proxy-super setBackground
227 (if (Boolean/parseBoolean v)
234 (.add frame (JScrollPane. table))
235 (.setSize frame (int 950) (int 550))
236 (.addMouseListener table
237 (proxy [MouseAdapter] []
240 (clear-agent-errors worker) ; I don't care what it was
244 (let [match (indexed (.rowAtPoint table (.getPoint ev)))
246 (Display3D/addMesh dummy-ls
247 (resample (.clone vs1) delta)
250 (Display3D/addMesh dummy-ls
251 (resample (SATs (match :SAT-name)) delta)
257 ; On double-click, show 3D view of the match:
258 (= 2 (.getClickCount ev))
260 ; On right-click, show menu
261 (Utils/isPopupTrigger ev)
262 (let [popup (JPopupMenu.)
263 new-command (fn [title action]
264 (let [item (JMenuItem. title)]
265 (.addActionListener item (proxy [ActionListener] []
266 (actionPerformed [evt]
267 (send-off worker (fn [_] (action))))))
270 (.add (new-command "Show match in 3D"
272 (.add (new-command "Show Mushroom body"
273 #(doseq [[k v] (SAT-lib :mb)]
274 (Display3D/addMesh dummy-ls v k Color/gray))))
275 (.add (new-command "Show interpolated"
276 #(Display3D/addMesh dummy-ls
277 (VectorString3D/createInterpolatedPoints
278 (Editions. (SATs (match :SAT-name)) (.clone vs1) delta false (double 1.1) (double 1.1) (double 1)) (float 0.5))
279 (str "Interpolated with " (match :SAT-name)) Color/magenta)))
280 (.add (new-command "Show stdDev plot"
281 #(let [cp (ini.trakem2.analysis.Compare$CATAParameters.)]
282 (if (.setup cp false nil true true)
283 (let [condensed (VectorString3D/createInterpolatedPoints
284 (let [v1 (.clone vs1)
285 v2 (SATs (match :SAT-name))]
286 (.resample v1 delta true)
287 (.resample v2 delta true)
288 (Editions. v1 v2 delta false (double 1.1) (double 1.1) (double 1)))
290 _ (let [c1 (Calibration.); Dummy default calibration but with same units as the pipe.
291 ; VectorString3D instances are already calibrated.
292 c2 (.. pipe getLayerSet getCalibrationCopy)]
293 (.setUnit c1 (.getUnit c2))
294 (.calibrate condensed c1))
295 _ (set! (. cp plot_max_x) (.length condensed))
296 plot (Compare/makePlot cp (str "Query versus " (match :SAT-name)) condensed)]
297 ; There are lazy evaluations ... that need to be promoted: lets print some data:
298 (println "plot is" plot " and condensed has " (.getStdDevAtEachPoint condensed))
301 (report "ERROR: could not make a plot!")))))))
302 (.show table (.getX ev) (.getY ev)))))))))))
304 ; Enlarge the cell width of the first column
305 (.setMinWidth (.. table getColumnModel (getColumn 0)) (int 250))
308 (.setVisible true))))
311 (def known-libs (ref {"Drosophila-3rd-instar" {:filepath "plugins/SAT-lib-Drosophila-3rd-instar.clj"
312 :reference-brain "FRT42 new"}}))
315 "Returns the SAT lib for the given name, loading it if not there yet. Nil otherwise."
317 (if-let [cached (@*libs* lib-name)]
320 (if-let [m (@known-libs lib-name)]
321 (load-SAT-lib lib-name
323 (m :reference-brain))
325 (report "Unknown lib " lib-name)
329 (report "An error ocurred while loading the SAT library: " e "\nCheck the terminal output.")
330 (.printStackTrace e))))))
333 (defn extract-fiducial-points
334 "Find a set of fiducial points in the project.
335 Will find the first node of type 'fiducial_points' and recursively inspect
336 its children nodes for nodes of type 'ball'. To each found 'ball', it will
337 assign the name of the ball node itself or the first superior node that
338 has a name different that this type.
339 Returns nil if none found."
341 (if-let [fp-node (.. project getRootProjectThing (findChildrenOfTypeR "fiducial_points"))]
342 (if-let [ball-nodes (.findChildrenOfTypeR (first fp-node) "ball")]
345 (let [title (.. pt-ball getObject getTitle) ; the title of the Ball ob itself, if any
346 b (.. pt-ball getObject getWorldBalls)]
349 (println "WARNING: empty ball object" (.getObject pt-ball))
353 (println "WARNING: ball object with more than one ball:" (.getObject pt-ball)))
355 (-> (if (not (= title (.getType pt-ball)))
357 (.. pt-ball getParent getTitle))
359 (.replace \space \_))
361 (Point3d. (get b0 0) (get b0 1) (get b0 2))))))))
364 (println "Found" (count fids) "fiducial points:\n" fids)
366 (println "No ball objects found for" fp-node))
367 (println "No fiducial points found")))
370 "Identify a Pipe or Polyline (which implement Line3D) that represent a SAT."
372 (identify p lib-name 1.0 true false))
373 ([p lib-name delta direct substring]
375 (if-let [SAT-lib (fetch-lib lib-name)]
376 (if-let [fids (extract-fiducial-points (.getProject p))]
380 (let [vs (.asVectorString3D p)]
381 (.calibrate vs (.. p getLayerSet getCalibrationCopy))
387 (report "Cannot find fiducial points for project" (.getProject p)))
388 (report "Cannot find a SAT library for" lib-name))
389 (report "Cannot identify a null pipe or polyline!"))))
392 (defn identify-without-gui
393 "Identify a Pipe or Polyline (which implement Line3D) that represent a SAT.
394 No GUI is shown. Returns the vector containing the list of matches and the list of names."
396 (identify-without-gui p lib-name 1.0 true false))
397 ([p lib-name delta direct substring]
399 (if-let [SAT-lib (fetch-lib lib-name)]
400 (let [SATs (SAT-lib :SATs)
401 query-vs (let [vs (.asVectorString3D p)]
402 (.calibrate vs (.. p getLayerSet getCalibrationCopy))
404 fids (extract-fiducial-points (.getProject p)) ; (first (.. p getProject getRootProjectThing (findChildrenOfTypeR "fiducial_points"))))
405 vs1 (register-vs query-vs fids (SAT-lib :fids))]
406 (match-all SATs vs1 delta direct substring)))
407 (report "Cannot identify a null pipe or polyline!"))))
411 ; "Returns a map with the number of SATs in the library, a list of names vs. sorted sequence lengths, the median length, the average length."
413 ; (if-let [lib (fetch-lib lib-name)]
414 ; (let [SATs (SAT-lib :SATs)
422 ; (report (str "Unknown library " lib-name))))
425 "Return a calibrate and registered VectorString3D from a chain."
426 [chain source-fids target-fids]
427 (let [vs (.clone (.vs chain))]
428 (.calibrate vs (.. chain getRoot getLayerSet getCalibrationCopy))
429 (register-vs vs source-fids target-fids)))
432 "Take all pipes in project and score/classify them.
433 Returns a sorted map of name vs. a vector with:
434 - if the top 1,2,3,4,5 have a homonymous
435 - the number of positives: 'true' and homonymous
436 - the number of false positives: 'true' and not homonymous
437 - the number of false negatives: 'false' and homonymous
438 - the number of true negatives: 'false' and not homonymous
439 - the FPR: false positive rate: false positives / ( false positives + true negatives )
440 - the FNR: false negative rate: false negatives / ( false negatives + true positives )
441 - the TPR: true positive rate: 1 - FNR
442 - the length of the sequence queried"
443 [project lib-name regex-exclude delta direct substring]
444 (let [fids (extract-fiducial-points (first (.. project getRootProjectThing (findChildrenOfTypeR "fiducial_points"))))
446 SAT-lib (fetch-lib lib-name)]
450 (let [vs (resample (ready-vs chain fids (SAT-lib :fids)) delta)
451 [matches names] (match-all (SAT-lib :SATs) vs delta direct substring)
452 names (vec (take tops names))
453 #^String SAT-name (let [t (.getCellTitle chain)]
454 (.substring t (int 0) (.indexOf t (int \space))))
455 ;has-top-match (fn [n] (some #(.startsWith % SAT-name) (take names n)))
456 dummy (println "###\nSAT-name: " SAT-name "\ntop 5 names: " names "\ntop 5 meds: " (map #(% :med) (take 5 matches)))
457 top-matches (loop [i (int 0)
461 (if (.startsWith (names i) SAT-name)
462 (into r (repeat (- tops i) true)) ; the rest are all true
463 (recur (inc i) (into [false] r)))))
464 true-positives (filter #(and
466 (.startsWith (% :SAT-name) SAT-name))
468 true-negatives (filter #(and
470 (not (.startsWith (% :SAT-name) SAT-name)))
472 false-positives (filter #(and
474 (not (.startsWith (% :SAT-name) SAT-name)))
476 false-negatives (filter #(and
478 (.startsWith (% :SAT-name) SAT-name))
480 (println "top matches for " SAT-name " : " (count top-matches) top-matches)
488 (count true-positives)
489 (count false-positives)
490 (count true-negatives)
491 (count false-negatives)
492 (/ (count false-positives) (+ (count false-positives) (count true-negatives))) ; False positive rate
493 (let [divisor (+ (count false-negatives) (count true-positives))]
496 (/ (count false-negatives) divisor))) ; False negative rate
497 (let [divisor (+ (count false-negatives) (count true-positives))]
500 (- 1 (/ (count false-negatives) divisor)))) ; True positive rate
503 (Compare/createPipeChains (.getRootProjectThing project) (.getRootLayerSet project) regex-exclude)))))
505 (defn summarize-as-confusion-matrix
506 "Takes the results of quantify-all and returns the confusion matrix as a map."
510 (merge-with + m {:true-positives (results 5)
511 :false-positives (results 6)
512 :true-negatives (results 7)
513 :false-negatives (results 8)}))
520 (defn summarize-as-distributions
521 "Takes the results of quantify-all and returns four vectors representing four histograms,
522 one for each distribution of true-positives, false-positives, true-negatives and false-negatives."
526 (merge-with (fn [m1 m2]
527 (merge-with + m1 m2))
529 {:true-positives {(results 5) 1}
530 :false-positives {(results 6) 1}
531 :true-negatives {(results 7) 1}
532 :false-negatives {(results 8) 1}}))
533 {:true-positives (sorted-map)
534 :false-positives (sorted-map)
535 :true-negatives (sorted-map)
536 :false-negatives (sorted-map)}
539 (defn summarize-quantify-all
540 "Takes the results of quantify-all and returns a map of two maps,
541 one with the confusion matrix and one with the distribution of true/false-positive/negative matches."
542 [project lib-name regex-exclude delta direct substring]
543 (let [qa (quantify-all project lib-name regex-exclude delta direct substring)]
544 {:confusion-matrix (summarize-as-confusion-matrix qa)
545 :distributions (summarize-as-distributions)}))
548 (defn print-quantify-all [t]
551 (doseq [x (take 5 v)] (print x \tab))
552 (doseq [x (nthnext v 5)] (print (float x) \tab))
557 "Take a set of non-homonymous VectorString3D.
558 Do pairwise comparisons, in this fashion:
560 2. substring length SL = 0.5 * max(len(vs1), len(vs2))
561 3. for all substrings of len SL of vs1, match against all of vs2.
562 4. Record best result in a set sorted by mean euclidean distance of the best match.
563 Assumes chains are registered and resampled.
564 Will ignore cases where the shorter chain is shorter than half the longer chain."
568 (let [rvs1 (let [v (.clone (.vs chain1))]
574 ; Find best reverse submatch for chain1 in chains
576 (println "Processing " (.getShortCellTitle chain1) " versus " (.getShortCellTitle chain2))
577 (let [vs2 (.vs chain2)
579 SL (int (/ (max len1 len2) 2))]
580 (if (or (= chain1 chain2) (< len1 SL) (< len2 SL))
581 best ; A chain has to be longer than half the length of the other to be worth looking at
583 ; Find best reverse submatch between chain1 and chain2
584 (fn [best-rsub start]
585 (let [bm (Compare/findBestMatch (.substring rvs1 start (+ start SL))
589 Compare/AVG_PHYS_DIST ; Also known as mean euclidean distance
594 (< (best-rsub :med) (get bm 1)))
596 {:chain1 chain1 :chain2 chain2 :med (get bm 1)})))
597 {:med Double/MAX_VALUE}
598 (range (- len1 SL)))]
601 (< (best :med) (b :med)))
604 {:med Double/MAX_VALUE} ; bogus initial element
607 #(int (- (%1 :med) (%2 :med)))
608 {:med Double/MAX_VALUE}) ; adding a bogus initial element
612 (defn find-reverse-match
613 "Takes all the chains of a project and compares them all-to-all pairwise with one of the two reversed.
614 Will calibrate the chains and resample them to delta."
615 [project delta regex-exclude]
616 (let [cal (.. project getRootLayerSet getCalibrationCopy)]
619 (.calibrate (.vs chain) cal)
620 (.resample (.vs chain) delta)
622 (Compare/createPipeChains (.getRootProjectThing project) (.getRootLayerSet project) regex-exclude))
625 (defn print-reversed-match
626 "Prints the sorted set returned by find-reversed-similar"
632 (println (.getShortCellTitle c1) (.getShortCellTitle c2) (m :med))))))
636 "Prints the number of unique SATs and the number of unique lineages in a library of SATs.
637 Returns the two sets in a map."
639 (let [lib (fetch-lib lib-name)
642 (if (> (.indexOf clone (int \space)) -1)
643 (conj unique (.substring clone 0 (.indexOf clone (int \space))))
647 unique-lineages (reduce
649 (if (> (.indexOf clone (int \:)) -1)
650 (conj unique (.substring clone 0 (.indexOf clone (int \:))))
654 (println "Unique SATs: " (count unique-sats) \newline "Unique lineages: " (count unique-lineages))
655 {:unique-sats unique-sats
656 :unique-lineages unique-lineages}))