stop obsoleting events once the query is done
[debiancodesearch.git] / ranking / post-ranking.go
blobcb02d4a9eef1c8d4b97458a36530eae7229fab07
1 // vim:ts=4:sw=4:noexpandtab
3 // Post-ranking happens on the source backend (because it has the source files
4 // in the kernel’s page cache). In the post-ranking phase we can do (limited)
5 // source file level analysis, such as in which scope the query string was
6 // matched (comment, top-level, sub-level).
7 package ranking
9 import (
10 "github.com/Debian/dcs/regexp"
11 "unicode"
14 //var packageLocation *regexp.Regexp = regexp.MustCompile(`debian-source-mirror/unpacked/([^/]+)_`)
16 func countSpaces(line string) int32 {
17 spaces := int32(0)
18 for _, r := range line {
19 if !unicode.IsSpace(r) {
20 break
22 spaces += 1
24 return spaces
27 func PostRank(opts RankingOpts, match *regexp.Match, querystr *QueryStr) float32 {
28 totalRanking := float32(1)
30 line := match.Context
32 if opts.Scope || opts.Weighted {
33 // Ranking: In which scope is the match? The higher the scope, the more
34 // important it is.
35 scopeRanking := 1.0 - (float32(countSpaces(line)) / 100.0)
36 totalRanking *= scopeRanking
39 if opts.Linematch || opts.Weighted {
40 // Ranking: Does the search query (with enforced word boundaries) match the
41 // line? If yes, earlier matches are better (such as function names versus
42 // parameter types).
43 index := querystr.boundaryRegexp.FindStringIndex(line)
44 if index != nil {
45 matchRanking := 0.75 + (0.25 * (1.0 - float32(index[0]) / float32(len(line))))
46 totalRanking *= matchRanking
47 } else {
48 // Punish the lines in which there was no word boundary match.
49 totalRanking *= 0.5
53 return totalRanking