encoder/strings: adds simple processor+tests - with example use case of encoding...
[luajson.git] / tests / lunit-strings.lua
blobbedf4aa510a96839d8c7ce86714c3ee683251e31
1 local json = require("json")
2 local lunit = require("lunit")
3 local testutil = require("testutil")
4 local string= require("string")
6 local encode = json.encode
7 -- DECODE NOT 'local' due to requirement for testutil to access it
8 decode = json.decode.getDecoder(false)
10 local error = error
12 module("lunit-strings", lunit.testcase, package.seeall)
14 local function assert_table_equal(expect, t)
15 if type(expect) ~= 'table' then
16 return assert_equal(expect, t)
17 end
18 for k,v in pairs(expect) do
19 if type(k) ~= 'string' and type(k) ~= 'number' and type(k) ~= 'boolean' then
20 error("INVALID expected table key")
21 end
22 local found = t[k]
23 if found == nil then
24 fail(tostring(k) .. " not found but expected")
25 end
26 assert_table_equal(v, t[k])
27 end
28 for k,v in pairs(t) do
29 if nil == expect[k] then
30 fail(tostring(k) .. " found but not expected")
31 end
32 end
33 end
35 function setup()
36 -- Ensure that the decoder is reset
37 _G["decode"] = json.decode.getDecoder(false)
38 end
40 function test_strict_quotes()
41 local opts = {
42 strings = {
43 strict_quotes = true
46 assert_error(function()
47 local decoder = json.decode.getDecoder(opts)
48 decoder("'hello'")
49 end)
50 opts.strings.strict_quotes = false
51 assert_equal("hello", json.decode.getDecoder(opts)("'hello'"))
52 -- Quote test
53 assert_equal("he'\"llo'", json.decode.getDecoder(opts)("'he\\'\"llo\\''"))
55 end
57 local utf16_matches = {
58 -- 1-byte
59 { '"\\u0000"', string.char(0x00) },
60 { '"\\u007F"', string.char(0x7F) },
61 -- 2-byte
62 { '"\\u0080"', string.char(0xC2, 0x80) },
63 { '"\\u00A2"', string.char(0xC2, 0xA2) },
64 { '"\\u07FF"', string.char(0xDF, 0xBF) },
65 -- 3-byte
66 { '"\\u0800"', string.char(0xE0, 0xA0, 0x80) },
67 { '"\\u20AC"', string.char(0xE2, 0x82, 0xAC) },
68 { '"\\uFEFF"', string.char(0xEF, 0xBB, 0xBF) },
69 { '"\\uFFFF"', string.char(0xEF, 0xBF, 0xBF) },
70 -- 4-byte - currently not handled
71 --{ '"\\uD800\\uDC00"', string.char(0xF0, 0x90, 0x80, 0x80) },
72 --{ '"\\uDBFF\\uDFFF"', string.char(0xF4, 0x8F, 0xBF, 0xBF) }
76 function test_utf16_decode()
77 for i, v in ipairs(utf16_matches) do
78 -- Test that the default \u decoder outputs UTF8
79 local num = tostring(i) .. ' '
80 assert_equal(num .. v[2], num .. json.decode(v[1]))
81 end
82 end
84 local BOM = string.char(0xEF, 0xBB, 0xBF)
85 -- BOM skipping tests - here due to relation to UTF8/16
86 local BOM_skip_tests = {
87 { BOM .. '"x"', "x" },
88 { BOM .. '["\\uFFFF",true]', { string.char(0xEF, 0xBF, 0xBF), true } },
89 -- Other uses of unicode spaces
92 function test_bom_skip()
93 for i,v in ipairs(BOM_skip_tests) do
94 assert_table_equal(v[2], json.decode(v[1]))
95 end
96 end
98 -- Unicode whitespace codepoints gleaned from unicode.org
99 local WHITESPACES = {
100 "\\u0009", -- \t
101 "\\u000A", -- \n
102 "\\u000B", -- \v
103 "\\u000C", -- \f
104 "\\u000D", -- \r
105 "\\u0020", -- space
106 "\\u0085",
107 "\\u00A0",
108 "\\u1680",
109 "\\u180E",
110 "\\u2000",
111 "\\u2001",
112 "\\u2002",
113 "\\u2003",
114 "\\u2004",
115 "\\u2005",
116 "\\u2006",
117 "\\u2007",
118 "\\u2008",
119 "\\u2009",
120 "\\u200A",
121 "\\u200B", -- addition, zero-width space
122 "\\u2028",
123 "\\u2029",
124 "\\u202F",
125 "\\u205F",
126 "\\u3000",
127 "\\uFEFF" -- Zero-width non-breaking space (BOM)
130 local inject_ws_values = {
131 "%WS%true",
132 " %WS%'the%WS blob' %WS%",
133 "%WS%{ key: %WS%\"valueMan\",%WS% key2:%WS%4.4}",
134 "%WS%false%WS%"
136 function test_whitespace_ignore()
137 for _, ws in ipairs(WHITESPACES) do
138 ws = json.decode('"' .. ws .. '"')
139 for _, v in ipairs(inject_ws_values) do
140 v = v:gsub("%%WS%%", ws)
141 assert_true(nil ~= json.decode(v))
146 function test_u_encoding()
147 local encoder = json.encode.getEncoder()
148 local decoder = json.decode.getDecoder()
149 for i = 0, 255 do
150 local char = string.char(i)
151 assert_equal(char, decoder(encoder(char)))
155 function test_x_encoding()
156 local encoder = json.encode.getEncoder({ strings = { xEncode = true } })
157 local decoder = json.decode.getDecoder()
158 for i = 0, 255 do
159 local char = string.char(i)
160 assert_equal(char, decoder(encoder(char)))
164 local multibyte_encoding_values = {
165 -- 2-byte
166 { '"\\u0080"', string.char(0xC2, 0x80) },
167 { '"\\u00A2"', string.char(0xC2, 0xA2) },
168 { '"\\u07FF"', string.char(0xDF, 0xBF) },
169 -- 3-byte
170 { '"\\u0800"', string.char(0xE0, 0xA0, 0x80) },
171 { '"\\u20AC"', string.char(0xE2, 0x82, 0xAC) },
172 { '"\\uFEFF"', string.char(0xEF, 0xBB, 0xBF) },
173 { '"\\uFFFF"', string.char(0xEF, 0xBF, 0xBF) },
174 -- 4-byte (surrogate pairs)
175 { '"\\uD800\\uDC00"', string.char(0xF0, 0x90, 0x80, 0x80) },
176 { '"\\uDBFF\\uDFFF"', string.char(0xF4, 0x8F, 0xBF, 0xBF) }
179 function test_custom_encoding()
180 local function processor(s)
181 return require("utf8_processor").process(s)
183 local encoder = json.encode.getEncoder({
184 strings = {
185 processor = processor
188 for i, v in ipairs(multibyte_encoding_values) do
189 local encoded = encoder(v[2])
190 assert_equal(v[1], encoded, "Failed to encode value using custom encoder")
194 function test_strict_decoding()
195 local encoder = json.encode.getEncoder(json.encode.strict)
196 local decoder = json.decode.getDecoder(json.decode.strict)
197 for i = 0, 255 do
198 local char = string.char(i)
199 -- Must wrap character in array due to decoder strict-ness
200 assert_equal(char, decoder(encoder({char}))[1])