tests/lunit-strings.lua

   1 local json = require("json")
   2 local lunit = require("lunit")
   3 local testutil = require("testutil")
   4 local string= require("string")
   5
   6 local encode = json.encode
   7 -- DECODE NOT 'local' due to requirement for testutil to access it
   8 decode = json.decode.getDecoder(false)
   9
  10 local error = error
  11
  12 if not module then
  13     _ENV = lunit.module("lunit-strings", 'seeall')
  14 else
  15     module("lunit-strings", lunit.testcase, package.seeall)
  16 end
  17
  18 local function assert_table_equal(expect, t)
  19         if type(expect) ~= 'table' then
  20                 return assert_equal(expect, t)
  21         end
  22         for k,v in pairs(expect) do
  23                 if type(k) ~= 'string' and type(k) ~= 'number' and type(k) ~= 'boolean' then
  24                         error("INVALID expected table key")
  25                 end
  26                 local found = t[k]
  27                 if found == nil then
  28                         fail(tostring(k) .. " not found but expected")
  29                 end
  30                 assert_table_equal(v, t[k])
  31         end
  32         for k,v in pairs(t) do
  33                 if nil == expect[k] then
  34                         fail(tostring(k) .. " found but not expected")
  35                 end
  36         end
  37 end
  38
  39 function setup()
  40         -- Ensure that the decoder is reset
  41         _G["decode"] = json.decode.getDecoder(false)
  42 end
  43
  44 function test_strict_quotes()
  45         local opts = {
  46                 strings = {
  47                         strict_quotes = true
  48                 }
  49         }
  50         assert_error(function()
  51                 local decoder = json.decode.getDecoder(opts)
  52                 decoder("'hello'")
  53         end)
  54         opts.strings.strict_quotes = false
  55         assert_equal("hello", json.decode.getDecoder(opts)("'hello'"))
  56         -- Quote test
  57         assert_equal("he'\"llo'", json.decode.getDecoder(opts)("'he\\'\"llo\\''"))
  58
  59 end
  60
  61 local utf16_matches = {
  62         -- 1-byte
  63         { '"\\u0000"', string.char(0x00) },
  64         { '"\\u007F"', string.char(0x7F) },
  65         -- 2-byte
  66         { '"\\u0080"', string.char(0xC2, 0x80) },
  67         { '"\\u00A2"', string.char(0xC2, 0xA2) },
  68         { '"\\u07FF"', string.char(0xDF, 0xBF) },
  69         -- 3-byte
  70         { '"\\u0800"', string.char(0xE0, 0xA0, 0x80) },
  71         { '"\\u20AC"', string.char(0xE2, 0x82, 0xAC) },
  72         { '"\\uFEFF"', string.char(0xEF, 0xBB, 0xBF) },
  73         { '"\\uFFFF"', string.char(0xEF, 0xBF, 0xBF) },
  74         -- 4-byte - currently not handled
  75         --{ '"\\uD800\\uDC00"', string.char(0xF0, 0x90, 0x80, 0x80) },
  76         --{ '"\\uDBFF\\uDFFF"', string.char(0xF4, 0x8F, 0xBF, 0xBF) }
  77
  78 }
  79
  80 function test_utf16_decode()
  81         for i, v in ipairs(utf16_matches) do
  82                 -- Test that the default \u decoder outputs UTF8
  83                 local num = tostring(i) .. ' '
  84                 assert_equal(num .. v[2], num .. json.decode(v[1]))
  85         end
  86 end
  87
  88 local BOM = string.char(0xEF, 0xBB, 0xBF)
  89 -- BOM skipping tests - here due to relation to UTF8/16
  90 local BOM_skip_tests = {
  91         { BOM .. '"x"', "x" },
  92         { BOM .. '["\\uFFFF",true]', { string.char(0xEF, 0xBF, 0xBF), true } },
  93         -- Other uses of unicode spaces
  94 }
  95
  96 function test_bom_skip()
  97         for i,v in ipairs(BOM_skip_tests) do
  98                 assert_table_equal(v[2], json.decode(v[1]))
  99         end
 100 end
 101
 102 -- Unicode whitespace codepoints gleaned from unicode.org
 103 local WHITESPACES = {
 104         "\\u0009", -- \t
 105         "\\u000A", -- \n
 106         "\\u000B", -- \v
 107         "\\u000C", -- \f
 108         "\\u000D", -- \r
 109         "\\u0020", -- space
 110         "\\u0085",
 111         "\\u00A0",
 112         "\\u1680",
 113         "\\u180E",
 114         "\\u2000",
 115         "\\u2001",
 116         "\\u2002",
 117         "\\u2003",
 118         "\\u2004",
 119         "\\u2005",
 120         "\\u2006",
 121         "\\u2007",
 122         "\\u2008",
 123         "\\u2009",
 124         "\\u200A",
 125         "\\u200B", -- addition, zero-width space
 126         "\\u2028",
 127         "\\u2029",
 128         "\\u202F",
 129         "\\u205F",
 130         "\\u3000",
 131         "\\uFEFF" -- Zero-width non-breaking space (BOM)
 132 }
 133
 134 local inject_ws_values = {
 135         "%WS%true",
 136         " %WS%'the%WS blob'  %WS%",
 137         "%WS%{ key: %WS%\"valueMan\",%WS% key2:%WS%4.4}",
 138         "%WS%false%WS%"
 139 }
 140 function test_whitespace_ignore()
 141         for _, ws in ipairs(WHITESPACES) do
 142                 ws = json.decode('"' .. ws .. '"')
 143                 for _, v in ipairs(inject_ws_values) do
 144                         v = v:gsub("%%WS%%", ws)
 145                         assert_true(nil ~= json.decode(v))
 146                 end
 147         end
 148 end
 149
 150 function test_u_encoding()
 151         local encoder = json.encode.getEncoder()
 152         local decoder = json.decode.getDecoder()
 153         for i = 0, 255 do
 154                 local char = string.char(i)
 155                 assert_equal(char, decoder(encoder(char)))
 156         end
 157 end
 158
 159 function test_x_encoding()
 160         local encoder = json.encode.getEncoder({ strings = { xEncode = true } })
 161         local decoder = json.decode.getDecoder()
 162         for i = 0, 255 do
 163                 local char = string.char(i)
 164                 assert_equal(char, decoder(encoder(char)))
 165         end
 166 end
 167
 168 local multibyte_encoding_values = {
 169         -- 2-byte
 170         { '"\\u0080"', string.char(0xC2, 0x80) },
 171         { '"\\u00A2"', string.char(0xC2, 0xA2) },
 172         { '"\\u07FF"', string.char(0xDF, 0xBF) },
 173         -- 3-byte
 174         { '"\\u0800"', string.char(0xE0, 0xA0, 0x80) },
 175         { '"\\u20AC"', string.char(0xE2, 0x82, 0xAC) },
 176         { '"\\uFEFF"', string.char(0xEF, 0xBB, 0xBF) },
 177         { '"\\uFFFF"', string.char(0xEF, 0xBF, 0xBF) },
 178         -- 4-byte (surrogate pairs)
 179         { '"\\uD800\\uDC00"', string.char(0xF0, 0x90, 0x80, 0x80) },
 180         { '"\\uDBFF\\uDFFF"', string.char(0xF4, 0x8F, 0xBF, 0xBF) }
 181 }
 182
 183 function test_custom_encoding()
 184         local function processor(s)
 185                 return require("utf8_processor").process(s)
 186         end
 187         local encoder = json.encode.getEncoder({
 188                 strings = {
 189                         processor = processor
 190                 }
 191         })
 192         for i, v in ipairs(multibyte_encoding_values) do
 193                 local encoded = encoder(v[2])
 194                 assert_equal(v[1], encoded, "Failed to encode value using custom encoder")
 195         end
 196 end
 197
 198 function test_strict_decoding()
 199         local encoder = json.encode.getEncoder(json.encode.strict)
 200         local decoder = json.decode.getDecoder(json.decode.strict)
 201         for i = 0, 255 do
 202                 local char = string.char(i)
 203                 -- Must wrap character in array due to decoder strict-ness
 204                 assert_equal(char, decoder(encoder({char}))[1])
 205         end
 206 end