5 // TODO: use container/vector as base for concatHeap
6 // TODO: or maybe we can use an in-place heap? in pprof top10, one can see memmove and garbage collection from push/pull to be major factors
11 type concatHeap
[]postMapReader
13 func (h
*concatHeap
) Less(i
, j
int) bool {
14 if (*h
)[i
].trigram
== (*h
)[j
].trigram
{
15 return (*h
)[i
].idmap
[0].new < (*h
)[j
].idmap
[0].new
17 return (*h
)[i
].trigram
< (*h
)[j
].trigram
20 func (h
*concatHeap
) Swap(i
, j
int) {
21 (*h
)[i
], (*h
)[j
] = (*h
)[j
], (*h
)[i
]
24 func (h
*concatHeap
) Len() int {
28 func (h
*concatHeap
) Pop() (v
interface{}) {
29 *h
, v
= (*h
)[:h
.Len()-1], (*h
)[h
.Len()-1]
33 func (h
*concatHeap
) Push(v
interface{}) {
34 *h
= append(*h
, v
.(postMapReader
))
37 //type concatHeap struct {
41 //func (h *concatHeap) Less(i, j int) bool {
42 // return h.At(i).(postMapReader).trigram < h.At(j).(postMapReader).trigram
45 func ConcatN(dst
string, sources
...string) {
46 //offsets := make([]uint32, len(sources))
47 ixes
:= make([]*Index
, len(sources
))
48 readers
:= make([]postMapReader
, len(sources
))
49 for i
, source
:= range sources
{
50 ixes
[i
] = Open(source
)
54 out
.writeString(magic
)
56 // Merged list of paths.
57 pathData
:= out
.offset()
58 out
.writeString("\x00")
60 // Merged list of names.
61 nameData
:= out
.offset()
62 nameIndexFile
:= bufCreate("")
64 for i
, _
:= range sources
{
65 readers
[i
].init(ixes
[i
], []idrange
{{
67 hi
: uint32(ixes
[i
].numName
),
69 offset
+= uint32(ixes
[i
].numName
)
70 // TODO: we can just memcpy the blocks of names, but we still need to
71 // fix up all the nameIndexFile numbers (i.e. write out.offset() + num
72 // instead of num). That could be faster than the following code, though:
73 for j
:= 0; j
< ixes
[i
].numName
; j
++ {
74 nameIndexFile
.writeUint32(out
.offset() - nameData
)
75 out
.writeString(ixes
[i
].Name(uint32(j
)))
76 out
.writeString("\x00")
80 nameIndexFile
.writeUint32(out
.offset())
82 // Merged list of posting lists.
83 postData
:= out
.offset()
89 lastTrigram
:= ^uint32(0)
90 for i
, _
:= range sources
{
91 heap
.Push(h
, readers
[i
])
94 reader
:= heap
.Pop(h
).(postMapReader
)
95 nextTrigram
:= reader
.trigram
97 if nextTrigram
== ^uint32(0) {
101 if lastTrigram
!= nextTrigram
&& lastTrigram
!= ^uint32(0) {
104 if lastTrigram
!= nextTrigram
{
105 w
.trigram(nextTrigram
)
108 reader
.writePostingList(&w
)
112 lastTrigram
= nextTrigram
116 nameIndex
:= out
.offset()
117 copyFile(out
, nameIndexFile
)
119 // Posting list index
120 postIndex
:= out
.offset()
121 copyFile(out
, w
.postIndexFile
)
123 out
.writeUint32(pathData
)
124 out
.writeUint32(nameData
)
125 out
.writeUint32(postData
)
126 out
.writeUint32(nameIndex
)
127 out
.writeUint32(postIndex
)
128 out
.writeString(trailerMagic
)
131 os
.Remove(nameIndexFile
.name
)
132 os
.Remove(w
.postIndexFile
.name
)
134 for i
, _
:= range sources
{