dcs-compute-ranking: properly overwrite old entries
[debiancodesearch.git] / cmd / dcs-compute-ranking / compute-ranking.go
blob92589355ca5acc0aba6aed692042a9d445eef389
1 // vim:ts=4:sw=4:noexpandtab
2 package main
4 import (
5 "compress/gzip"
6 "database/sql"
7 "flag"
8 "fmt"
9 _ "github.com/lib/pq"
10 "github.com/mstap/godebiancontrol"
11 "log"
12 "net/http"
13 "strings"
16 var mirrorUrl = flag.String("mirror_url",
17 "http://ftp.ch.debian.org/debian/",
18 "URL to the debian mirror to use")
19 var dryRun = flag.Bool("dry_run", false, "Don’t actually write anything to the database.")
20 var verbose = flag.Bool("verbose", false, "Print ranking information about every package")
21 var popconInstSrc map[string]float32 = make(map[string]float32)
23 // Fills popconInstSrc from the Ultimate Debian Database (udd). The popcon
24 // installation count is stored normalized by dividing through the total amount
25 // of popcon installations.
26 func fillPopconInst() {
27 db, err := sql.Open("postgres", "dbname=udd host=/var/run/postgresql/ sslmode=disable")
28 if err != nil {
29 log.Fatal(err)
32 var totalInstallations int
33 var packageName string
34 var installations int
35 err = db.QueryRow("SELECT SUM(insts) FROM popcon").Scan(&totalInstallations)
36 if err != nil {
37 log.Fatalf("Could not get SUM(insts) FROM popcon: %v", err)
40 if *verbose {
41 log.Printf("total %d installations", totalInstallations)
44 rows, err := db.Query("SELECT source, insts FROM popcon_src")
45 if err != nil {
46 log.Fatal(err)
48 for rows.Next() {
49 if err = rows.Scan(&packageName, &installations); err != nil {
50 log.Fatal(err)
53 // XXX: We multiply 1000 here because all values are < 0.0009.
54 popconInstSrc[packageName] = (float32(installations) / float32(totalInstallations)) * 1000
59 func mustLoadMirroredControlFile(name string) []godebiancontrol.Paragraph {
60 url := fmt.Sprintf("%s/dists/sid/main/%s", *mirrorUrl, name)
61 resp, err := http.Get(url)
62 if err != nil {
63 log.Fatal(err)
65 if resp.StatusCode != 200 {
66 log.Fatalf("URL %q resulted in %v\n", url, resp.Status)
68 defer resp.Body.Close()
70 reader, err := gzip.NewReader(resp.Body)
71 if err != nil {
72 log.Fatal(err)
74 contents, err := godebiancontrol.Parse(reader)
75 if err != nil {
76 log.Fatal(err)
79 return contents
82 func main() {
83 flag.Parse()
85 fillPopconInst()
87 db, err := sql.Open("postgres", "dbname=dcs host=/var/run/postgresql/ sslmode=disable")
88 if err != nil {
89 log.Fatal(err)
92 insert, err := db.Prepare("INSERT INTO pkg_ranking (package, popcon, rdepends) VALUES ($1, $2, $3)")
93 if err != nil {
94 log.Fatal(err)
96 defer insert.Close()
98 update, err := db.Prepare("UPDATE pkg_ranking SET popcon = $2, rdepends = $3 WHERE package = $1")
99 if err != nil {
100 log.Fatal(err)
102 defer update.Close()
104 sourcePackages := mustLoadMirroredControlFile("source/Sources.gz")
105 binaryPackages := mustLoadMirroredControlFile("binary-amd64/Packages.gz")
107 reverseDeps := make(map[string]uint)
108 for _, pkg := range binaryPackages {
109 // We need to filter duplicates, because consider this:
110 // agda-bin Recommends: libghc-agda-dev (>= 2.3.2), libghc-agda-dev (<< 2.3.2)
111 dependsOn := make(map[string]bool)
112 // NB: This differs from what apt-cache rdepends spit out. apt-cache
113 // also considers the Replaces field.
114 allDeps := pkg["Depends"] + "," + pkg["Suggests"] + "," + pkg["Recommends"] + "," + pkg["Enhances"]
115 for _, dep := range strings.FieldsFunc(allDeps, func(r rune) bool {
116 return r == ',' || r == '|'
117 }) {
118 trimmed := strings.TrimSpace(dep)
119 spaceIdx := strings.Index(trimmed, " ")
120 if spaceIdx == -1 {
121 spaceIdx = len(trimmed)
123 dependsOn[trimmed[:spaceIdx]] = true
125 for name, _ := range dependsOn {
126 reverseDeps[name] += 1
130 for _, pkg := range sourcePackages {
131 rdepcount := float32(0)
132 for _, packageName := range strings.Split(pkg["Binary"], ",") {
133 packageName = strings.TrimSpace(packageName)
134 if packageName == "" {
135 continue
137 rdepcount += float32(reverseDeps[packageName])
139 srcpkg := pkg["Package"]
140 packageRank := popconInstSrc[srcpkg]
141 rdepcount = 1.0 - (1.0 / float32(rdepcount+1))
142 if *verbose {
143 fmt.Printf("%f %f %s\n", packageRank, rdepcount, srcpkg)
145 if *dryRun {
146 continue
148 result, err := update.Exec(srcpkg, packageRank, rdepcount)
149 affected := int64(0)
150 // The UPDATE succeeded, but let’s see whether it affected a row.
151 if err == nil {
152 affected, err = result.RowsAffected()
154 if err != nil || affected == 0 {
155 if _, err = insert.Exec(srcpkg, packageRank, rdepcount); err != nil {
156 log.Fatal(err)