Merge pull request #49 from jokajak/bugfix/lpeg_version_check
[luajson.git] / tests / lunit-strings.lua
blob2e94767b859cc1ced1c6acde90c27a7047a7b569
1 local json = require("json")
2 local lunit = require("lunit")
3 local testutil = require("testutil")
4 local string= require("string")
6 local encode = json.encode
7 -- DECODE NOT 'local' due to requirement for testutil to access it
8 decode = json.decode.getDecoder(false)
10 local error = error
12 if not module then
13 _ENV = lunit.module("lunit-strings", 'seeall')
14 else
15 module("lunit-strings", lunit.testcase, package.seeall)
16 end
18 local function assert_table_equal(expect, t)
19 if type(expect) ~= 'table' then
20 return assert_equal(expect, t)
21 end
22 for k,v in pairs(expect) do
23 if type(k) ~= 'string' and type(k) ~= 'number' and type(k) ~= 'boolean' then
24 error("INVALID expected table key")
25 end
26 local found = t[k]
27 if found == nil then
28 fail(tostring(k) .. " not found but expected")
29 end
30 assert_table_equal(v, t[k])
31 end
32 for k,v in pairs(t) do
33 if nil == expect[k] then
34 fail(tostring(k) .. " found but not expected")
35 end
36 end
37 end
39 function setup()
40 -- Ensure that the decoder is reset
41 _G["decode"] = json.decode.getDecoder(false)
42 end
44 function test_strict_quotes()
45 local opts = {
46 strings = {
47 strict_quotes = true
50 assert_error(function()
51 local decoder = json.decode.getDecoder(opts)
52 decoder("'hello'")
53 end)
54 opts.strings.strict_quotes = false
55 assert_equal("hello", json.decode.getDecoder(opts)("'hello'"))
56 -- Quote test
57 assert_equal("he'\"llo'", json.decode.getDecoder(opts)("'he\\'\"llo\\''"))
59 end
61 local utf16_matches = {
62 -- 1-byte
63 { '"\\u0000"', string.char(0x00) },
64 { '"\\u007F"', string.char(0x7F) },
65 -- 2-byte
66 { '"\\u0080"', string.char(0xC2, 0x80) },
67 { '"\\u00A2"', string.char(0xC2, 0xA2) },
68 { '"\\u07FF"', string.char(0xDF, 0xBF) },
69 -- 3-byte
70 { '"\\u0800"', string.char(0xE0, 0xA0, 0x80) },
71 { '"\\u20AC"', string.char(0xE2, 0x82, 0xAC) },
72 { '"\\uFEFF"', string.char(0xEF, 0xBB, 0xBF) },
73 { '"\\uFFFF"', string.char(0xEF, 0xBF, 0xBF) },
74 -- 4-byte - currently not handled
75 --{ '"\\uD800\\uDC00"', string.char(0xF0, 0x90, 0x80, 0x80) },
76 --{ '"\\uDBFF\\uDFFF"', string.char(0xF4, 0x8F, 0xBF, 0xBF) }
80 function test_utf16_decode()
81 for i, v in ipairs(utf16_matches) do
82 -- Test that the default \u decoder outputs UTF8
83 local num = tostring(i) .. ' '
84 assert_equal(num .. v[2], num .. json.decode(v[1]))
85 end
86 end
88 local BOM = string.char(0xEF, 0xBB, 0xBF)
89 -- BOM skipping tests - here due to relation to UTF8/16
90 local BOM_skip_tests = {
91 { BOM .. '"x"', "x" },
92 { BOM .. '["\\uFFFF",true]', { string.char(0xEF, 0xBF, 0xBF), true } },
93 -- Other uses of unicode spaces
96 function test_bom_skip()
97 for i,v in ipairs(BOM_skip_tests) do
98 assert_table_equal(v[2], json.decode(v[1]))
99 end
102 -- Unicode whitespace codepoints gleaned from unicode.org
103 local WHITESPACES = {
104 "\\u0009", -- \t
105 "\\u000A", -- \n
106 "\\u000B", -- \v
107 "\\u000C", -- \f
108 "\\u000D", -- \r
109 "\\u0020", -- space
110 "\\u0085",
111 "\\u00A0",
112 "\\u1680",
113 "\\u180E",
114 "\\u2000",
115 "\\u2001",
116 "\\u2002",
117 "\\u2003",
118 "\\u2004",
119 "\\u2005",
120 "\\u2006",
121 "\\u2007",
122 "\\u2008",
123 "\\u2009",
124 "\\u200A",
125 "\\u200B", -- addition, zero-width space
126 "\\u2028",
127 "\\u2029",
128 "\\u202F",
129 "\\u205F",
130 "\\u3000",
131 "\\uFEFF" -- Zero-width non-breaking space (BOM)
134 local inject_ws_values = {
135 "%WS%true",
136 " %WS%'the%WS blob' %WS%",
137 "%WS%{ key: %WS%\"valueMan\",%WS% key2:%WS%4.4}",
138 "%WS%false%WS%"
140 function test_whitespace_ignore()
141 for _, ws in ipairs(WHITESPACES) do
142 ws = json.decode('"' .. ws .. '"')
143 for _, v in ipairs(inject_ws_values) do
144 v = v:gsub("%%WS%%", ws)
145 assert_true(nil ~= json.decode(v))
150 function test_u_encoding()
151 local encoder = json.encode.getEncoder()
152 local decoder = json.decode.getDecoder()
153 for i = 0, 255 do
154 local char = string.char(i)
155 assert_equal(char, decoder(encoder(char)))
159 function test_x_encoding()
160 local encoder = json.encode.getEncoder({ strings = { xEncode = true } })
161 local decoder = json.decode.getDecoder()
162 for i = 0, 255 do
163 local char = string.char(i)
164 assert_equal(char, decoder(encoder(char)))
168 local multibyte_encoding_values = {
169 -- 2-byte
170 { '"\\u0080"', string.char(0xC2, 0x80) },
171 { '"\\u00A2"', string.char(0xC2, 0xA2) },
172 { '"\\u07FF"', string.char(0xDF, 0xBF) },
173 -- 3-byte
174 { '"\\u0800"', string.char(0xE0, 0xA0, 0x80) },
175 { '"\\u20AC"', string.char(0xE2, 0x82, 0xAC) },
176 { '"\\uFEFF"', string.char(0xEF, 0xBB, 0xBF) },
177 { '"\\uFFFF"', string.char(0xEF, 0xBF, 0xBF) },
178 -- 4-byte (surrogate pairs)
179 { '"\\uD800\\uDC00"', string.char(0xF0, 0x90, 0x80, 0x80) },
180 { '"\\uDBFF\\uDFFF"', string.char(0xF4, 0x8F, 0xBF, 0xBF) }
183 function test_custom_encoding()
184 local function processor(s)
185 return require("utf8_processor").process(s)
187 local encoder = json.encode.getEncoder({
188 strings = {
189 processor = processor
192 for i, v in ipairs(multibyte_encoding_values) do
193 local encoded = encoder(v[2])
194 assert_equal(v[1], encoded, "Failed to encode value using custom encoder")
198 function test_strict_decoding()
199 local encoder = json.encode.getEncoder(json.encode.strict)
200 local decoder = json.decode.getDecoder(json.decode.strict)
201 for i = 0, 255 do
202 local char = string.char(i)
203 -- Must wrap character in array due to decoder strict-ness
204 assert_equal(char, decoder(encoder({char}))[1])