split up constants.h some
[trinity.git] / unicode.c
blobf32eb1860c279de1cd21b2a81a71f30722c0a3dd
1 /*
2 * Routines for generating a page of mangled unicode.
4 * Inspiration:
5 * http://www.cl.cam.ac.uk/~mgk25/ucs/examples/quickbrown.txt
6 * http://www.columbia.edu/~fdc/utf8/
7 * http://www.cl.cam.ac.uk/~mgk25/unicode.html
8 * http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
9 * http://stackoverflow.com/questions/1319022/really-good-bad-utf-8-example-test-data
10 * http://www.twitter.com/glitchr
12 * Lots more to do here, but this is a start.
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include "arch.h"
19 #include "sanitise.h"
21 void gen_unicode_page(char *page)
23 unsigned int i = 0, l;
24 unsigned int unilen;
26 const char unicode1[4] = { 0xb8, 0xe0, 0xe0, 0xaa };
27 const char unicode2[6] = { 0x89, 0xb9, 0xb9, 0xe0, 0xe0, 0x89 };
28 const char unicode3[2] = { 0x89, 0xb9 };
29 const char unicode4[18] = { 0xbb, 0xef, 0xd2, 0xa9, 0xd2, 0x88, 0x20, 0x88, 0x88, 0xd2, 0x88, 0xd2, 0xd2, 0x20, 0xd2, 0x88, 0x0a, 0x88 };
30 const char unicode5[4] = { 0xd9, 0x20, 0xd2, 0x87 };
31 const char unicode6[4] = { 0xcc, 0x88, 0xd2, 0xbf };
32 const char unicode7[2] = { 0x0a, 0xbf };
33 const char *zalgo[] = { "T̫̺̳o̬̜ ì̬͎̲̟nv̖̗̻̣̹̕o͖̗̠̜̤k͍͚̹͖̼e̦̗̪͍̪͍ ̬ͅt̕h̠͙̮͕͓e̱̜̗͙̭ ̥͔̫͙̪͍̣͝ḥi̼̦͈̼v҉̩̟͚̞͎e͈̟̻͙̦̤-m̷̘̝̱í͚̞̦̳n̝̲̯̙̮͞d̴̺̦͕̫ ̗̭̘͎͖r̞͎̜̜͖͎̫͢ep͇r̝̯̝͖͉͎̺e̴s̥e̵̖̳͉͍̩̗n̢͓̪͕̜̰̠̦t̺̞̰i͟n҉̮̦̖̟g̮͍̱̻͍̜̳ ̳c̖̮̙̣̰̠̩h̷̗͍̖͙̭͇͈a̧͎̯̹̲̺̫ó̭̞̜̣̯͕s̶̤̮̩̘.̨̻̪̖͔ ̳̭̦̭̭̦̞́I̠͍̮n͇̹̪̬v̴͖̭̗̖o̸k҉̬̤͓͚̠͍i͜n̛̩̹͉̘̹g͙ ̠̥ͅt̰͖͞h̫̼̪e̟̩̝ ̭̠̲̫͔fe̤͇̝̱e͖̮̠̹̭͖͕l͖̲̘͖̠̪i̢̖͎̮̗̯͓̩n̸̰g̙̱̘̗͚̬ͅ ͍o͍͍̩̮͢f̖͓̦̥ ̘͘c̵̫̱̗͚͓̦h͝a̝͍͍̳̣͖͉o͙̟s̤̞.̙̝̭̣̳̼͟ ̢̻͖͓̬̞̰̦W̮̲̝̼̩̝͖i͖͖͡ͅt̘̯͘h̷̬̖̞̙̰̭̳ ̭̪̕o̥̤̺̝̼̰̯͟ṳ̞̭̤t̨͚̥̗ ̟̺̫̩̤̳̩o̟̰̩̖ͅr̞̘̫̩̼d̡͍̬͎̪̺͚͔e͓͖̝̙r̰͖̲̲̻̠.̺̝̺̟͈ ̣̭T̪̩̼h̥̫̪͔̀e̫̯͜ ̨N̟e҉͔̤zp̮̭͈̟é͉͈ṛ̹̜̺̭͕d̺̪̜͇͓i̞á͕̹̣̻n͉͘ ̗͔̭͡h̲͖̣̺̺i͔̣̖̤͎̯v̠̯̘͖̭̱̯e̡̥͕-m͖̭̣̬̦͈i͖n̞̩͕̟̼̺͜d̘͉ ̯o̷͇̹͕̦f̰̱ ̝͓͉̱̪̪c͈̲̜̺h̘͚a̞͔̭̰̯̗̝o̙͍s͍͇̱͓.̵͕̰͙͈ͅ ̯̞͈̞̱̖Z̯̮̺̤̥̪̕a͏̺̗̼̬̗ḻg͢o̥̱̼.̺̜͇͡ͅ ̴͓͖̭̩͎̗ ̧̪͈̱̹̳͖͙H̵̰̤̰͕̖e̛ ͚͉̗̼̞w̶̩̥͉̮h̩̺̪̩͘ͅọ͎͉̟ ̜̩͔̦̘ͅW̪̫̩̣̲͔̳a͏͔̳͖i͖͜t͓̤̠͓͙s̘̰̩̥̙̝ͅ ̲̠̬̥Be̡̙̫̦h̰̩i̛̫͙͔̭̤̗̲n̳͞d̸ ͎̻͘T̛͇̝̲̹̠̗ͅh̫̦̝ͅe̩̫͟ ͓͖̼W͕̳͎͚̙̥ą̙l̘͚̺͔͞ͅl̳͍̙̤̤̮̳.̢ ̟̺̜̙͉Z̤̲̙̙͎̥̝A͎̣͔̙͘L̥̻̗̳̻̳̳͢G͉̖̯͓̞̩̦O̹̹̺!̙͈͎̞̬ T̷̗͑̃ͦ̊͊̓͡͞h̛̥͚̖͇̝̬̹̟̃̋̽ͥ͆̂ͦ͗ȅ̱͗͘ ̱͈͋͛͡͝e͙͓̼̰̹̲̒ͤ́ͫ̓́͊͆͢n͂͗͊͌ͦ̐̊҉̧̟̺t̐ͧ̍̀ͤ̈́̔̒͏̻̜͙̝͕͍ͅï̛̠̩̦̿̉ͪ͌ͩ̚r̶̳̺͈̺͈͚͍ͯ̄ͤ͌̋͂͝e͖̟͓̝̩̺̬̭͙̐̉ ̵̴̴̗̤̺̥̰͚̐ͧͥͦ̿͛ͤr͙̣͔̺͆͞o̶̠̯͇͈ͧͭ̄ͩ̔̍͞ơ̖͙͔̥͋ͣ̈m͙͉̱̱̞̦̘̰͑ͭ͒̂̀͘ ̛̔ͭ̈ͬͮ҉̞͢ͅiͮ̒̔͘͏͉̯͎̦s̶̡͈͎̱̖͎̫̙̊ͫ̿̋ͣ͡ͅ ̙̣̬̟͈͋̓f͍͈̰̘̻̃͋͂̐i̸̺͙̪͔̞͙̣ͧͨ͝lͬ͂҉̸̻͔̙l͖̼ͮ͢͝e̷̟̙͔̠̯̓̓ͧ́ͬͪ̇̃d̶͕͇͎̦̐̓ ̮̯̥́͋̚̕w͑̍̔̔̀ͪͯ͘͢͏͉̰i̛̟̰̣ͭ̌̊͑̒ͫ̉͠t͇̖̬̠̗̲̄̓̈ͣͮ̂͂̊͗ḩ̲̖̊ͪ̓̄ ̝̺̟͕͖̈͋̎ͩͩ͆̈́̿͡Z̵̴̖͖͕̔ͬͮ͒̏̅̍̎a̤͖ͬ͑̎͐͑̔ͭ͞l̝̼̩͋̂g̨ͦ̓̓̏̈́̉ͯ͏͎͔̟̮̠̬͙ỏ͍̝̺͕͈ͭ̓̏̽̓ͭ̾́͢͞.͓̹͇̬̔̓̏ͦ̚͜͠" };
34 const char *thai[] = { "ก็็็็็็็็็็กิิิิิิิิิิก้้้้้้้้้้ก็็็็็็็็็็ก็็็็็็็็็็กิิิิิิิิิิก้้้้้้้้้้" };
36 unsigned int zalgolen = strlen(*zalgo);
37 unsigned int thailen = strlen(*thai);
39 char *ptr = page;
41 while (i < (page_size - zalgolen)) {
42 unsigned int j;
44 j = rand() % 9;
46 switch (j) {
48 case 0:
49 strncpy(ptr, unicode1, 4);
50 ptr += 4;
51 i += 4;
52 break;
54 case 1: unilen = rand() % 10;
55 for (l = 0; l < unilen; l++) {
56 strncpy(ptr, unicode2, 6);
57 ptr += 6;
58 i += 6;
59 if ((i + 6) > page_size)
60 break;
62 break;
64 case 2: strncpy(ptr, unicode3, 2);
65 i += 2;
66 ptr += 2;
67 break;
68 case 3: strncpy(ptr, unicode4, 18);
69 i += 18;
70 ptr += 18;
71 break;
73 case 4: strncpy(ptr, unicode5, 4);
74 i += 4;
75 ptr += 4;
76 break;
78 case 5: unilen = rand() % 10;
79 for (l = 0; l < unilen; l++) {
80 strncpy(ptr, unicode6, 4);
81 ptr += 4;
82 i += 4;
83 if ((i + 4) > page_size)
84 break;
86 break;
88 case 6: strncpy(ptr, unicode7, 4);
89 i += 4;
90 ptr += 4;
91 break;
93 /* HE COMES. */
94 case 7: strncpy(ptr, *zalgo, zalgolen);
95 i += zalgolen;
96 ptr += zalgolen;
97 break;
99 case 8: strncpy(ptr, *thai, thailen);
100 i += thailen;
101 ptr += thailen;
102 break;
106 page[rand() % page_size] = 0;
109 #ifdef STANDALONE
111 * gcc -I include -g -DSTANDALONE unicode.c -o unicode
113 #include <stdio.h>
114 #include <stdlib.h>
115 #include <string.h>
117 unsigned int page_size = 4096;
119 void main(int argc, char* argv[])
121 unsigned char *page;
122 unsigned int x = 0, y, n = 0;
123 struct timeval t;
125 gettimeofday(&t, 0);
126 srand((t.tv_sec * getpid()) ^ t.tv_usec);
128 page = malloc(4096);
129 memset(page, 0, 4096);
131 gen_unicode_page(page);
133 for (y = 0; y < 4096; y+=32) {
134 for (x = 0; x < 32; x++) {
135 printf("%c", page[n++]);
139 #endif