Pass query to /events via URL parameter
[debiancodesearch.git] / cmd / dcs-compute-ranking / compute-ranking.go
blobe290b0404875d31e8706b05862826a70512f65ee
1 // vim:ts=4:sw=4:noexpandtab
2 package main
4 import (
5 "compress/gzip"
6 "encoding/json"
7 "flag"
8 "fmt"
9 "io/ioutil"
10 "log"
11 "net/http"
12 "os"
13 "path/filepath"
14 "strings"
16 "github.com/stapelberg/godebiancontrol"
19 var (
20 mirrorUrl = flag.String("mirror_url",
21 "http://httpredir.debian.org/debian",
22 "URL to the debian mirror to use")
24 verbose = flag.Bool("verbose",
25 false,
26 "Print ranking information about every package")
28 outputPath = flag.String("output_path",
29 "/var/dcs/ranking.json",
30 "Path to store the resulting ranking JSON data at. Will be overwritten atomically using rename(2), which also implies that TMPDIR= must point to a directory on the same file system as -output_path.")
33 func mustLoadMirroredControlFile(name string) []godebiancontrol.Paragraph {
34 url := fmt.Sprintf("%s/dists/sid/main/%s", *mirrorUrl, name)
35 resp, err := http.Get(url)
36 if err != nil {
37 log.Fatal(err)
39 if resp.StatusCode != 200 {
40 log.Fatalf("URL %q resulted in %v\n", url, resp.Status)
42 defer resp.Body.Close()
44 reader, err := gzip.NewReader(resp.Body)
45 if err != nil {
46 log.Fatal(err)
48 contents, err := godebiancontrol.Parse(reader)
49 if err != nil {
50 log.Fatal(err)
53 return contents
56 func main() {
57 flag.Parse()
59 sourcePackages := mustLoadMirroredControlFile("source/Sources.gz")
60 binaryPackages := mustLoadMirroredControlFile("binary-amd64/Packages.gz")
62 popconInstSrc, err := popconInstallations(binaryPackages)
63 if err != nil {
64 log.Fatal(err)
66 // Normalize the installation count.
67 var totalInstallations float32
68 for _, insts := range popconInstSrc {
69 totalInstallations += insts
71 for srcpkg, insts := range popconInstSrc {
72 // We multiply 1000 here because all values are < 0.0009.
73 popconInstSrc[srcpkg] = (insts / totalInstallations) * 1000
76 reverseDeps := make(map[string]uint)
77 for _, pkg := range binaryPackages {
78 // We need to filter duplicates, because consider this:
79 // agda-bin Recommends: libghc-agda-dev (>= 2.3.2), libghc-agda-dev (<< 2.3.2)
80 dependsOn := make(map[string]bool)
81 // NB: This differs from what apt-cache rdepends spit out. apt-cache
82 // also considers the Replaces field.
83 allDeps := pkg["Depends"] + "," + pkg["Suggests"] + "," + pkg["Recommends"] + "," + pkg["Enhances"]
84 for _, dep := range strings.FieldsFunc(allDeps, func(r rune) bool {
85 return r == ',' || r == '|'
86 }) {
87 trimmed := strings.TrimSpace(dep)
88 spaceIdx := strings.Index(trimmed, " ")
89 if spaceIdx == -1 {
90 spaceIdx = len(trimmed)
92 dependsOn[trimmed[:spaceIdx]] = true
94 for name, _ := range dependsOn {
95 reverseDeps[name] += 1
99 type storedRanking struct {
100 Inst float32
101 Rdep float32
103 rankings := make(map[string]storedRanking)
105 for _, pkg := range sourcePackages {
106 rdepcount := float32(0)
107 for _, packageName := range strings.Split(pkg["Binary"], ",") {
108 packageName = strings.TrimSpace(packageName)
109 if packageName == "" {
110 continue
112 rdepcount += float32(reverseDeps[packageName])
114 srcpkg := pkg["Package"]
115 packageRank := popconInstSrc[srcpkg]
116 rdepcount = 1.0 - (1.0 / float32(rdepcount+1))
117 if *verbose {
118 fmt.Printf("%f %f %s\n", packageRank, rdepcount, srcpkg)
120 rankings[srcpkg] = storedRanking{packageRank, rdepcount}
123 f, err := ioutil.TempFile(filepath.Dir(*outputPath), "dcs-compute-ranking")
124 if err != nil {
125 log.Fatal(err)
128 if err := json.NewEncoder(f).Encode(rankings); err != nil {
129 log.Fatal(err)
132 if err := f.Close(); err != nil {
133 log.Fatal(err)
136 if err := os.Rename(f.Name(), *outputPath); err != nil {
137 log.Fatal(err)