Update README.md
[sm64pc.git] / tools / gen_asset_list.cpp
blobc1d0e901ae6e0845ca456e22ff72105d27cc6643
1 // WARNING: THIS SCRIPT IS CURRENTLY BROKEN.
2 // It doesn't handle skyboxes/cake images correctly.
4 // Usage:
5 // g++-8 -std=c++17 ./tools/gen_asset_list.cpp -lstdc++fs -O1 -Wall -o tools/gen_asset_list
6 // ./tools/gen_asset_list
8 #include <algorithm>
9 #include <cassert>
10 #include <cstdio>
11 #include <filesystem>
12 #include <fstream>
13 #include <iostream>
14 #include <map>
15 #include <sstream>
16 #include <string>
17 #include <unordered_map>
18 #include <vector>
19 using namespace std;
21 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
22 # define BSWAP32(x) ((((x) >> 24) & 0xff) | (((x) >> 8) & 0xff00) | (((x) << 8) & 0xff0000) | (((x) << 24) & 0xff000000U))
23 # define BSWAP16(x) ((((x) >> 8) & 0xff) | (((x) << 8) & 0xff00))
24 #else
25 # define BSWAP32(x) (x)
26 # define BSWAP16(x) (x)
27 #endif
29 const char* OUTPUT_FILE = "assets.json";
30 const size_t CHUNK_SIZE = 16;
31 const vector<string> LANGS = {"jp", "us", "eu", "sh"};
33 typedef uint8_t u8;
34 typedef uint64_t u64;
36 struct Pos {
37 size_t pos;
38 size_t mio0;
41 const u64 C = 12318461241ULL;
43 size_t findCutPos(const string& s) {
44 size_t ind = s.find_first_not_of(s[0], 1);
45 if (ind == string::npos) ind = 0;
46 else ind--;
47 if (ind + CHUNK_SIZE <= s.size())
48 return ind;
49 return s.size() - CHUNK_SIZE;
52 pair<size_t, u64> hashString(const string& inp) {
53 size_t cutPos = findCutPos(inp);
54 string s = inp.substr(cutPos, CHUNK_SIZE);
55 u64 ret = 0;
56 for (u8 c : s) {
57 ret *= C;
58 ret += c;
60 return {cutPos, ret};
63 template<class F>
64 void rollingHashes(const string& str, size_t chunkSize, F&& f) {
65 if (str.size() < chunkSize) return;
66 u64 h = 0, pw = 1;
67 for (size_t i = 0; i < chunkSize; i++)
68 h = h * C + (u8)str[i], pw = pw * C;
69 f(0, h);
70 for (size_t i = chunkSize; i < str.size(); i++) {
71 h = h * C + (u8)str[i] - pw * (u8)str[i-chunkSize];
72 f(i - chunkSize + 1, h);
76 bool stringMatches(const string& base, size_t pos, const string& target) {
77 if (pos + target.size() > base.size()) return false;
78 for (int it = 0; it < 10; it++) {
79 size_t i = rand() % target.size();
80 if (base[pos + i] != target[i]) return false;
82 for (size_t i = 0; i < target.size(); i++) {
83 if (base[pos + i] != target[i]) return false;
85 return true;
88 string mio0_decompress(uint32_t *src) {
89 uint32_t size = BSWAP32(src[1]);
90 string output(size, '\0');
91 char *dest = output.data();
92 char *destEnd = (size + dest);
93 uint16_t *cmpOffset = (uint16_t *)((char *)src + BSWAP32(src[2]));
94 char *rawOffset = ((char *)src + BSWAP32(src[3]));
95 int counter = 0;
96 uint32_t controlBits;
98 src += 4;
100 while (dest != destEnd) {
101 if (counter == 0) {
102 controlBits = *src++;
103 controlBits = BSWAP32(controlBits);
104 counter = 32;
107 if (controlBits & 0x80000000) {
108 *dest++ = *rawOffset++;
110 else {
111 uint16_t dcmpParam = *cmpOffset++;
112 dcmpParam = BSWAP16(dcmpParam);
113 int dcmpCount = (dcmpParam >> 12) + 3;
114 char* dcmpPtr = dest - (dcmpParam & 0x0FFF);
116 while (dcmpCount) {
117 *dest++ = dcmpPtr[-1];
118 dcmpCount--;
119 dcmpPtr++;
123 counter--;
124 controlBits <<= 1;
126 return output;
129 string readFile(const string& p, bool allowMissing = false) {
130 ifstream fin(p, ios::binary);
131 if (!fin) {
132 if (allowMissing) return "";
133 cerr << "missing file " << p << endl;
134 exit(1);
136 fin.seekg(0, fin.end);
137 auto length = fin.tellg();
138 fin.seekg(0, fin.beg);
139 string data(length, '\0');
140 fin.read(data.data(), length);
141 assert(fin);
142 return data;
145 pair<int, int> getPngSize(const string& fname) {
146 string buffer(16, '\0');
147 uint32_t w, h;
148 ifstream fin(fname, ios::binary);
149 fin.read(buffer.data(), 16);
150 fin.read((char*)&w, 4);
151 fin.read((char*)&h, 4);
152 assert(fin);
153 assert(buffer.substr(0, 4) == "\x89PNG");
154 assert(buffer.substr(12, 4) == "IHDR");
155 w = BSWAP32(w);
156 h = BSWAP32(h);
157 return {w, h};
160 string exec(const string& cmd) {
161 char buffer[128];
162 string result;
163 FILE* pipe = popen(cmd.c_str(), "r");
164 assert(pipe);
165 size_t s;
166 while ((s = fread(buffer, 1, sizeof(buffer), pipe))) {
167 result += string(buffer, buffer + s);
169 assert(!ferror(pipe));
170 pclose(pipe);
171 return result;
174 string compileAsset(const string& fname) {
175 auto ind = fname.rfind('.');
176 if (ind == string::npos) return "";
177 string q = fname.substr(ind + 1);
178 if (q == "png") {
179 string prev = fname.substr(0, ind);
181 for (const string& lang : LANGS) {
182 string ret = readFile("build/" + lang + "/" + prev, true);
183 if (!ret.empty()) return ret;
186 ind = prev.rfind('.');
187 if (ind == string::npos) return "";
188 q = prev.substr(ind + 1);
189 if (q == "rgba16" || q == "ia16" || q == "ia8" || q == "ia4" || q == "ia1") {
190 return exec("./tools/n64graphics -i /dev/stdout -g " + fname + " -f " + q);
193 if (q == "m64")
194 return readFile(fname);
195 if (q == "bin" && fname.find("assets") != string::npos)
196 return readFile(fname);
197 return "";
200 tuple<string, string, vector<string>> compileSoundData(const string& lang) {
201 string upper_lang = lang;
202 for (char& ch : upper_lang) ch = (char)(ch + 'A' - 'a');
203 string build_dir = "build/" + lang;
204 string dir = build_dir + "/sound";
205 string ctl = dir + "/sound_data.ctl";
206 string tbl = dir + "/sound_data.tbl";
207 exec("make " + tbl + " VERSION=" + lang + " NOEXTRACT=1");
208 string sampleFilesStr =
209 exec("python3 tools/assemble_sound.py " +
210 dir + "/samples/ "
211 "sound/sound_banks/ " +
212 dir + "/sound_data.ctl " +
213 dir + "/sound_data.tbl " +
214 "-DVERSION_" + upper_lang +
215 " --print-samples");
216 vector<string> sampleFiles;
217 istringstream iss(sampleFilesStr);
218 string line;
219 while (getline(iss, line)) {
220 line = line.substr(build_dir.size() + 1);
221 line[line.size() - 1] = 'f';
222 sampleFiles.push_back(line);
224 string ctlData = readFile(ctl);
225 string tblData = readFile(tbl);
226 return {ctlData, tblData, sampleFiles};
229 int main() {
230 //intentional syntax error; // (see comment at top of file)
231 map<string, string> assets;
232 map<string, vector<pair<string, int>>> soundAssets;
234 cout << "compiling assets..." << endl;
235 int totalAssets = 0;
236 for (string base_dir : {"assets", "sound/sequences", "textures", "levels", "actors"}) {
237 for (auto& ent: filesystem::recursive_directory_iterator(base_dir)) {
238 string p = ent.path().string();
239 string bin = compileAsset(p);
240 if (bin.empty()) continue;
241 if (bin.size() < CHUNK_SIZE) {
242 cerr << "asset " << p << " is too small (" << bin.size() << " bytes), expected at least " << CHUNK_SIZE << " bytes" << endl;
243 continue;
245 assets[p] = bin;
246 totalAssets++;
249 for (const string& lang : LANGS) {
250 string ctl, tbl;
251 vector<string> sampleFiles;
252 tie(ctl, tbl, sampleFiles) = compileSoundData(lang);
253 assets["@sound ctl " + lang] = ctl;
254 assets["@sound tbl " + lang] = tbl;
255 totalAssets += 2;
256 for (size_t i = 0; i < sampleFiles.size(); i++) {
257 soundAssets[sampleFiles[i]].emplace_back(lang, i);
260 cout << "compiled " << totalAssets << " assets" << endl;
262 unordered_map<u64, vector<pair<size_t, pair<string, string>>>> hashes;
263 for (const auto& asset : assets) {
264 size_t cutPos;
265 u64 hash;
266 tie(cutPos, hash) = hashString(asset.second);
267 hashes[hash].push_back(make_pair(cutPos, asset));
270 map<pair<string, string>, Pos> assetPositions;
271 for (const string& lang : LANGS) {
272 cout << "searching " << lang << "..." << endl;
273 auto remHashes = hashes;
274 auto search = [&](string& str, string lang, size_t mio0) {
275 rollingHashes(str, CHUNK_SIZE, [&](size_t hashPos, u64 hash) {
276 if (!remHashes.count(hash)) return;
277 vector<pair<size_t, pair<string, string>>>& conts = remHashes.at(hash);
278 auto it = remove_if(conts.begin(), conts.end(),
279 [&](const pair<size_t, pair<string, string>>& pa) {
280 size_t cutPos = pa.first;
281 const string& name = pa.second.first;
282 const string& data = pa.second.second;
283 size_t assetPos = hashPos - cutPos;
284 if (stringMatches(str, assetPos, data)) {
285 assetPositions[make_pair(lang, name)] = {assetPos, mio0};
286 return true;
288 return false;
290 conts.erase(it, conts.end());
291 if (conts.empty()) remHashes.erase(hash);
295 string rom = readFile("baserom." + lang + ".z64");
297 for (size_t i = 0; i < rom.size(); i += 4) {
298 if (rom[i] == 'M' && rom[i+1] == 'I' && rom[i+2] == 'O' && rom[i+3] == '0') {
299 string data = mio0_decompress((uint32_t*)&rom[i]);
300 search(data, lang, i);
304 search(rom, lang, 0);
307 cout << "generating " << OUTPUT_FILE << "..." << endl;
308 ofstream fout(OUTPUT_FILE);
309 assert(fout);
310 fout <<
311 "{\n"
312 "\"@comment\": \"This file was generated by tools/gen_asset_list.cpp. "
313 "When renaming a file, either change its name in this file directly, "
314 "or regenerate this file using that script.\"";
316 bool first1 = true;
317 vector<string> notFound;
318 for (const auto& asset : assets) {
319 const string& name = asset.first;
320 const string& data = asset.second;
321 vector<pair<string, Pos>> positions;
322 for (const string& lang : LANGS) {
323 auto it = assetPositions.find(make_pair(lang, name));
324 if (it != assetPositions.end()) {
325 positions.push_back(make_pair(lang, it->second));
329 if (positions.empty()) {
330 notFound.push_back(name);
332 else {
333 fout << ",\n";
334 if (first1) fout << "\n";
335 first1 = false;
336 fout << "\"" << name << "\": [";
337 if (name.substr(name.size() - 4) == ".png") {
338 int w, h;
339 tie(w, h) = getPngSize(name);
340 fout << w << "," << h << ",";
342 fout << data.size() << ",{";
343 bool first2 = true;
344 for (auto& pa : positions) {
345 auto p = pa.second;
346 if (!first2) fout << ",";
347 first2 = false;
348 fout << "\"" << pa.first << "\":[";
349 if (p.mio0)
350 fout << p.mio0 << ",";
351 fout << p.pos << ']';
353 fout << "}]";
356 for (const auto& asset : soundAssets) {
357 const string& name = asset.first;
358 const vector<pair<string, int>>& locs = asset.second;
359 fout << ",\n";
360 fout << "\"" << name << "\": [0,{";
361 bool first2 = true;
362 for (auto& pa : locs) {
363 if (!first2) fout << ",";
364 first2 = false;
365 fout << "\"" << pa.first << "\":[\"@sound\"," << pa.second << ']';
367 fout << "}]";
369 fout << "\n}" << endl;
370 assert(fout);
371 fout.close();
373 if (!notFound.empty()) {
374 cout << endl;
375 cout << "Missing " << notFound.size() << " assets." << endl;
376 if (notFound.size() <= 10) {
377 for (auto& s : notFound) {
378 cout << s << endl;
381 return 1;
384 cout << "done!" << endl;
386 return 0;