fix server-rendered results by generating result pages into a buffer and reading...
[debiancodesearch.git] / index / concatn.go
bloba82efba1ead13d2616eb93e63a276364b2c6e767
1 package index
3 import (
4 "container/heap"
5 // TODO: use container/vector as base for concatHeap
6 // TODO: or maybe we can use an in-place heap? in pprof top10, one can see memmove and garbage collection from push/pull to be major factors
7 //"container/vector"
8 "os"
11 type concatHeap []postMapReader
13 func (h *concatHeap) Less(i, j int) bool {
14 if (*h)[i].trigram == (*h)[j].trigram {
15 return (*h)[i].idmap[0].new < (*h)[j].idmap[0].new
17 return (*h)[i].trigram < (*h)[j].trigram
20 func (h *concatHeap) Swap(i, j int) {
21 (*h)[i], (*h)[j] = (*h)[j], (*h)[i]
24 func (h *concatHeap) Len() int {
25 return len(*h)
28 func (h *concatHeap) Pop() (v interface{}) {
29 *h, v = (*h)[:h.Len()-1], (*h)[h.Len()-1]
30 return
33 func (h *concatHeap) Push(v interface{}) {
34 *h = append(*h, v.(postMapReader))
37 //type concatHeap struct {
38 // vector.Vector
39 //}
41 //func (h *concatHeap) Less(i, j int) bool {
42 // return h.At(i).(postMapReader).trigram < h.At(j).(postMapReader).trigram
43 //}
45 func ConcatN(dst string, sources ...string) {
46 //offsets := make([]uint32, len(sources))
47 ixes := make([]*Index, len(sources))
48 readers := make([]postMapReader, len(sources))
49 for i, source := range sources {
50 ixes[i] = Open(source)
53 out := bufCreate(dst)
54 out.writeString(magic)
56 // Merged list of paths.
57 pathData := out.offset()
58 out.writeString("\x00")
60 // Merged list of names.
61 nameData := out.offset()
62 nameIndexFile := bufCreate("")
63 var offset uint32
64 for i, _ := range sources {
65 readers[i].init(ixes[i], []idrange{{
66 lo: 0,
67 hi: uint32(ixes[i].numName),
68 new: offset}})
69 offset += uint32(ixes[i].numName)
70 // TODO: we can just memcpy the blocks of names, but we still need to
71 // fix up all the nameIndexFile numbers (i.e. write out.offset() + num
72 // instead of num). That could be faster than the following code, though:
73 for j := 0; j < ixes[i].numName; j++ {
74 nameIndexFile.writeUint32(out.offset() - nameData)
75 out.writeString(ixes[i].Name(uint32(j)))
76 out.writeString("\x00")
80 nameIndexFile.writeUint32(out.offset())
82 // Merged list of posting lists.
83 postData := out.offset()
84 var w postDataWriter
86 w.init(out)
88 h := new(concatHeap)
89 lastTrigram := ^uint32(0)
90 for i, _ := range sources {
91 heap.Push(h, readers[i])
93 for {
94 reader := heap.Pop(h).(postMapReader)
95 nextTrigram := reader.trigram
97 if nextTrigram == ^uint32(0) {
98 break
101 if lastTrigram != nextTrigram && lastTrigram != ^uint32(0) {
102 w.endTrigram()
104 if lastTrigram != nextTrigram {
105 w.trigram(nextTrigram)
108 reader.writePostingList(&w)
109 reader.nextTrigram()
110 heap.Push(h, reader)
112 lastTrigram = nextTrigram
115 // Name index
116 nameIndex := out.offset()
117 copyFile(out, nameIndexFile)
119 // Posting list index
120 postIndex := out.offset()
121 copyFile(out, w.postIndexFile)
123 out.writeUint32(pathData)
124 out.writeUint32(nameData)
125 out.writeUint32(postData)
126 out.writeUint32(nameIndex)
127 out.writeUint32(postIndex)
128 out.writeString(trailerMagic)
129 out.flush()
131 os.Remove(nameIndexFile.name)
132 os.Remove(w.postIndexFile.name)
134 for i, _ := range sources {
135 ixes[i].Close()