show: make highlight legible
[debiancodesearch.git] / cmd / dcs-reshard / reshard.go
blob23dbe41390adec4e16150ba1d7111bb0e14d531c
1 // Generates shell scripts to be run on each old shard. These scripts copy data
2 // around to the new shards. Use this whenever your -shards= configuration on
3 // dcs-feeder changes and you don’t want to start over with downloading data.
4 package main
6 import (
7 "compress/gzip"
8 "crypto/md5"
9 "flag"
10 "fmt"
11 "github.com/stapelberg/godebiancontrol"
12 "io"
13 "log"
14 "net/http"
15 "os"
16 "strconv"
17 "strings"
20 var (
21 mirrorUrl = flag.String("mirror_url",
22 "http://deb.debian.org/debian",
23 "Debian mirror URL")
24 oldShardsStr = flag.String("old_shards",
25 "10.209.68.76:21010,10.209.68.12:21010,10.209.68.22:21010,10.209.68.74:21010,10.209.66.198:21010",
26 "comma-separated list of shards")
27 newShardsStr = flag.String("new_shards",
28 "10.209.68.76:21010,10.209.68.12:21010,10.209.68.22:21010,10.209.68.74:21010,10.209.66.198:21010,10.209.102.194:21010",
29 "comma-separated list of shards")
31 oldShards []string
32 newShards []string
35 func taskIdxForPackage(pkg string, tasks int) int {
36 h := md5.New()
37 io.WriteString(h, pkg)
38 i, err := strconv.ParseInt(fmt.Sprintf("%x", h.Sum(nil)[:6]), 16, 64)
39 if err != nil {
40 log.Fatal(err)
42 return int(i) % tasks
45 func main() {
46 flag.Parse()
48 oldShards = strings.Split(*oldShardsStr, ",")
49 newShards = strings.Split(*newShardsStr, ",")
51 sourcesSuffix := "/dists/sid/main/source/Sources.gz"
52 resp, err := http.Get(*mirrorUrl + sourcesSuffix)
53 if err != nil {
54 log.Printf("Could not get Sources.gz: %v\n", err)
55 return
57 defer resp.Body.Close()
58 reader, err := gzip.NewReader(resp.Body)
59 if err != nil {
60 log.Printf("Could not initialize gzip reader: %v\n", err)
61 return
63 defer reader.Close()
65 sourcePackages, err := godebiancontrol.Parse(reader)
66 if err != nil {
67 log.Printf("Could not parse Sources.gz: %v\n", err)
68 return
71 scripts := make([]*os.File, len(oldShards))
72 for idx, _ := range oldShards {
73 scripts[idx], err = os.Create(fmt.Sprintf("/tmp/dcs-instant-%d.rackspace.zekjur.net", idx) + ".sh")
74 if err != nil {
75 log.Fatal(err)
77 defer scripts[idx].Close()
80 // for every package, calculate who’d be responsible and see if it’s present on that shard.
81 for _, pkg := range sourcePackages {
82 p := pkg["Package"] + "_" + pkg["Version"]
83 oldIdx := taskIdxForPackage(p, len(oldShards))
84 newIdx := taskIdxForPackage(p, len(newShards))
85 log.Printf("oldidx = %d, newidx = %d\n", oldIdx, newIdx)
86 if oldIdx == newIdx {
87 continue
89 fmt.Fprintf(scripts[oldIdx], "scp -o StrictHostKeyChecking=no -i ~/.ssh/dcs-auto-rs -r /dcs-ssd/unpacked/%s /dcs-ssd/unpacked/%s.idx root@%s:/dcs-ssd/unpacked/ && rm -rf /dcs-ssd/unpacked/%s /dcs-ssd/unpacked/%s.idx\n",
90 p, p, strings.TrimSuffix(newShards[newIdx], ":21010"), p, p)