11 std::map
<uint32_t, uint32_t> glyph_index
;
12 std::map
<std::string
, uint32_t> glyph_offsets
;
13 std::map
<uint32_t, std::string
> glyphs
;
14 uint32_t next_offset
= 1;
16 void add_glyph(uint32_t codepoint
, const std::string
& appearence
)
18 if(((codepoint
>> 16) > 10) || ((codepoint
& 0x1FF800) == 0xD800)) {
19 std::cerr
<< "Illegal codepoint " << std::hex
<< codepoint
<< "." << std::endl
;
22 if(!glyph_offsets
.count(appearence
)) {
23 if(appearence
.find_first_not_of("0123456789ABCDEFabcdef") < appearence
.length()) {
24 std::cerr
<< "Invalid font representation (invalid hex char)." << std::endl
;
25 std::cerr
<< "Faulty representation: '" << appearence
<< "'." << std::endl
;
28 glyph_offsets
[appearence
] = next_offset
;
29 glyph_index
[next_offset
] = codepoint
;
30 if(appearence
.length() == 32)
32 else if(appearence
.length() == 64)
35 std::cerr
<< "Invalid font representation (length " << appearence
.length() << ")."
37 std::cerr
<< "Faulty representation: '" << appearence
<< "'." << std::endl
;
41 glyphs
[codepoint
] = appearence
;
44 uint32_t bucket_size(uint32_t items
)
48 return static_cast<uint32_t>(pow(items
, 1.5));
51 //This is Jenkin's MIX function.
52 uint32_t keyhash(uint32_t key
, uint32_t item
, uint32_t mod
)
57 a
=a
-b
; a
=a
-c
; a
=a
^(c
>> 13);
58 b
=b
-c
; b
=b
-a
; b
=b
^(a
<< 8);
59 c
=c
-a
; c
=c
-b
; c
=c
^(b
>> 13);
60 a
=a
-b
; a
=a
-c
; a
=a
^(c
>> 12);
61 b
=b
-c
; b
=b
-a
; b
=b
^(a
<< 16);
62 c
=c
-a
; c
=c
-b
; c
=c
^(b
>> 5);
63 a
=a
-b
; a
=a
-c
; a
=a
^(c
>> 3);
64 b
=b
-c
; b
=b
-a
; b
=b
^(a
<< 10);
65 c
=c
-a
; c
=c
-b
; c
=c
^(b
>> 15);
69 uint32_t wrong_key(uint32_t key
, uint32_t hash
, uint32_t mod
)
74 while(keyhash(key
, i
, mod
) == hash
)
79 std::pair
<uint32_t, uint32_t> make_subdirectory(std::vector
<uint32_t>& items
)
82 return std::make_pair(0, items
.size());
83 std::vector
<uint32_t> memory
;
84 memory
.resize(bucket_size(items
.size()));
88 //Safety hatch: If unsuccessful too many times, increase the hash size.
90 memory
.resize(memory
.size() + 1);
95 for(uint32_t i
= 0; i
< memory
.size(); i
++)
96 memory
[i
] = 0xFFFFFFFFUL
;
97 for(uint32_t i
= 0; i
< items
.size(); i
++) {
98 uint32_t j
= keyhash(seed
, items
[i
], memory
.size());
99 if(memory
[j
] != 0xFFFFFFFFUL
) {
103 memory
[j
] = items
[i
];
109 return std::make_pair(seed
, memory
.size());
112 void write_subdirectory(std::vector
<uint32_t>& items
, std::pair
<uint32_t, uint32_t> p
,
117 std::vector
<uint32_t> memory
;
118 std::cout
<< "," << p
.first
<< "UL," << p
.second
<< "UL";
119 memory
.resize(p
.second
);
120 for(uint32_t i
= 0; i
< memory
.size(); i
++)
122 for(uint32_t i
= 0; i
< items
.size(); i
++)
123 memory
[keyhash(p
.first
, items
[i
], memory
.size())] = items
[i
];
124 for(uint32_t i
= 0; i
< memory
.size(); i
++) {
125 if(memory
[i
] != badkey
)
126 std::cout
<< "," << memory
[i
] << "UL," << glyph_offsets
[glyphs
[memory
[i
]]] << "UL";
128 std::cout
<< "," << badkey
<< "UL,0UL";
130 std::cout
<< std::endl
;
133 uint32_t glyph_part_string(const std::string
& str
)
135 return strtoul(str
.c_str(), NULL
, 16);
140 for(std::map
<uint32_t, uint32_t>::iterator i
= glyph_index
.begin(); i
!= glyph_index
.end(); ++i
) {
141 std::string glyph
= glyphs
[i
->second
];
142 if(glyph
.length() == 32) {
144 for(unsigned i
= 0; i
< 32; i
+= 8)
145 std::cout
<< "," << glyph_part_string(glyph
.substr(i
, 8)) << "UL";
146 } else if(glyph
.length() == 64) {
148 for(unsigned i
= 0; i
< 64; i
+= 8)
149 std::cout
<< "," << glyph_part_string(glyph
.substr(i
, 8)) << "UL";
151 std::cerr
<< "INTERNAL ERROR: Invalid glyph data" << std::endl
;
155 std::cout
<< std::endl
;
158 void write_main_directory(uint32_t seed
, std::vector
<std::vector
<uint32_t> >& main_directory
,
159 std::vector
<std::pair
<uint32_t, uint32_t> >& subdirectories
)
161 std::cout
<< "," << seed
<< "UL";
162 std::cout
<< "," << main_directory
.size() << "UL";
163 uint32_t subdir_offset
= next_offset
+ 4 + main_directory
.size();
164 for(size_t i
= 0; i
< main_directory
.size(); i
++) {
165 if(subdirectories
[i
].second
) {
166 std::cout
<< "," << subdir_offset
<< "UL";
167 subdir_offset
= subdir_offset
+ 2 + 2 * subdirectories
[i
].second
;
169 std::cout
<< "," << next_offset
<< "UL";
171 std::cout
<< std::endl
;
179 while(std::getline(std::cin
, line
)) {
180 if(line
[line
.length() - 1] == '\r')
181 line
= line
.substr(0, line
.length() - 1);
182 size_t split
= line
.find_first_of("#:");
183 if(split
> line
.length() || line
[split
] == '#')
185 std::string codepoint
= line
.substr(0, split
);
186 std::string appearence
= line
.substr(split
+ 1);
189 cp
= strtoul(codepoint
.c_str(), &end
, 16);
191 std::cerr
<< "Invalid codepoint number '" << codepoint
<< "." << std::endl
;
194 add_glyph(cp
, appearence
);
196 std::cerr
<< "Loaded " << glyphs
.size() << " glyphs (" << glyph_offsets
.size() << " distinct)." << std::endl
;
198 std::cerr
<< "Need at least 1 glyph." << std::endl
;
201 uint32_t best_seed
= 0;
202 uint32_t best_score
= 0xFFFFFFFFUL
;
203 uint32_t main_directory_size
= glyphs
.size();
204 for(unsigned i
= 0; i
< 1000; i
++) {
206 std::cerr
<< "." << std::flush
;
207 uint32_t seed
= rand();
208 std::vector
<uint32_t> bucket_items
;
209 bucket_items
.resize(main_directory_size
);
210 for(uint32_t i
= 0; i
< main_directory_size
; i
++)
212 for(std::map
<uint32_t, std::string
>::iterator i
= glyphs
.begin(); i
!= glyphs
.end(); ++i
)
213 bucket_items
[keyhash(seed
, i
->first
, main_directory_size
)]++;
215 for(uint32_t i
= 0; i
< main_directory_size
; i
++)
217 score
= score
+ 2 * bucket_size(bucket_items
[i
]) + 3;
220 if(score
< best_score
) {
225 std::cerr
<< std::endl
;
226 std::cerr
<< "Best estimated score: " << best_score
<< " for seed " << best_seed
<< "." << std::endl
;
227 std::vector
<std::vector
<uint32_t> > main_directory
;
228 std::vector
<std::pair
<uint32_t, uint32_t> > subdirectories
;
229 main_directory
.resize(main_directory_size
);
230 subdirectories
.resize(main_directory_size
);
231 for(std::map
<uint32_t, std::string
>::iterator i
= glyphs
.begin(); i
!= glyphs
.end(); ++i
)
232 main_directory
[keyhash(best_seed
, i
->first
, main_directory_size
)].push_back(i
->first
);
233 uint32_t update_interval
= main_directory_size
/ 10;
234 unsigned real_score
= 0;
235 for(size_t i
= 0; i
< main_directory
.size(); i
++) {
236 if((i
% update_interval
) == 0)
237 std::cerr
<< "." << std::flush
;
238 subdirectories
[i
] = make_subdirectory(main_directory
[i
]);
239 if(subdirectories
[i
].second
)
240 real_score
= real_score
+ 2 * subdirectories
[i
].second
+ 3;
242 real_score
= real_score
+ 1;
244 std::cerr
<< std::endl
;
245 std::cerr
<< "Final score: " << real_score
<< " (" << next_offset
+ real_score
+ 4 << " elements)."
247 std::cout
<< "#include <cstdint>" << std::endl
;
248 std::cout
<< "uint32_t fontdata_size = " << next_offset
+ real_score
+ 4 << ";" << std::endl
;
249 std::cout
<< "uint32_t fontdata[] = {";
250 std::cout
<< next_offset
+ 2 << "UL";
252 std::cout
<< ",0,0" << std::endl
;
253 write_main_directory(best_seed
, main_directory
, subdirectories
);
254 for(size_t i
= 0; i
< main_directory
.size(); i
++)
255 write_subdirectory(main_directory
[i
], subdirectories
[i
], wrong_key(best_seed
, i
, main_directory_size
));
256 std::cout
<< "};" << std::endl
;