tests/lunit-strings.lua

   1 local json = require("json")
   2 local lunit = require("lunit")
   3 local testutil = require("testutil")
   4 local string= require("string")
   5
   6 local encode = json.encode
   7 -- DECODE NOT 'local' due to requirement for testutil to access it
   8 decode = json.decode.getDecoder(false)
   9
  10 local error = error
  11
  12 module("lunit-strings", lunit.testcase, package.seeall)
  13
  14 local function assert_table_equal(expect, t)
  15         if type(expect) ~= 'table' then
  16                 return assert_equal(expect, t)
  17         end
  18         for k,v in pairs(expect) do
  19                 if type(k) ~= 'string' and type(k) ~= 'number' and type(k) ~= 'boolean' then
  20                         error("INVALID expected table key")
  21                 end
  22                 local found = t[k]
  23                 if found == nil then
  24                         fail(tostring(k) .. " not found but expected")
  25                 end
  26                 assert_table_equal(v, t[k])
  27         end
  28         for k,v in pairs(t) do
  29                 if nil == expect[k] then
  30                         fail(tostring(k) .. " found but not expected")
  31                 end
  32         end
  33 end
  34
  35 function setup()
  36         -- Ensure that the decoder is reset
  37         _G["decode"] = json.decode.getDecoder(false)
  38 end
  39
  40 function test_strict_quotes()
  41         local opts = {
  42                 strings = {
  43                         strict_quotes = true
  44                 }
  45         }
  46         assert_error(function()
  47                 local decoder = json.decode.getDecoder(opts)
  48                 decoder("'hello'")
  49         end)
  50         opts.strings.strict_quotes = false
  51         assert_equal("hello", json.decode.getDecoder(opts)("'hello'"))
  52         -- Quote test
  53         assert_equal("he'\"llo'", json.decode.getDecoder(opts)("'he\\'\"llo\\''"))
  54
  55 end
  56
  57 local utf16_matches = {
  58         -- 1-byte
  59         { '"\\u0000"', string.char(0x00) },
  60         { '"\\u007F"', string.char(0x7F) },
  61         -- 2-byte
  62         { '"\\u0080"', string.char(0xC2, 0x80) },
  63         { '"\\u00A2"', string.char(0xC2, 0xA2) },
  64         { '"\\u07FF"', string.char(0xDF, 0xBF) },
  65         -- 3-byte
  66         { '"\\u0800"', string.char(0xE0, 0xA0, 0x80) },
  67         { '"\\u20AC"', string.char(0xE2, 0x82, 0xAC) },
  68         { '"\\uFEFF"', string.char(0xEF, 0xBB, 0xBF) },
  69         { '"\\uFFFF"', string.char(0xEF, 0xBF, 0xBF) },
  70         -- 4-byte - currently not handled
  71         --{ '"\\uD800\\uDC00"', string.char(0xF0, 0x90, 0x80, 0x80) },
  72         --{ '"\\uDBFF\\uDFFF"', string.char(0xF4, 0x8F, 0xBF, 0xBF) }
  73
  74 }
  75
  76 function test_utf16_decode()
  77         for i, v in ipairs(utf16_matches) do
  78                 -- Test that the default \u decoder outputs UTF8
  79                 local num = tostring(i) .. ' '
  80                 assert_equal(num .. v[2], num .. json.decode(v[1]))
  81         end
  82 end
  83
  84 local BOM = string.char(0xEF, 0xBB, 0xBF)
  85 -- BOM skipping tests - here due to relation to UTF8/16
  86 local BOM_skip_tests = {
  87         { BOM .. '"x"', "x" },
  88         { BOM .. '["\\uFFFF",true]', { string.char(0xEF, 0xBF, 0xBF), true } },
  89         -- Other uses of unicode spaces
  90 }
  91
  92 function test_bom_skip()
  93         for i,v in ipairs(BOM_skip_tests) do
  94                 assert_table_equal(v[2], json.decode(v[1]))
  95         end
  96 end
  97
  98 -- Unicode whitespace codepoints gleaned from unicode.org
  99 local WHITESPACES = {
 100         "\\u0009", -- \t
 101         "\\u000A", -- \n
 102         "\\u000B", -- \v
 103         "\\u000C", -- \f
 104         "\\u000D", -- \r
 105         "\\u0020", -- space
 106         "\\u0085",
 107         "\\u00A0",
 108         "\\u1680",
 109         "\\u180E",
 110         "\\u2000",
 111         "\\u2001",
 112         "\\u2002",
 113         "\\u2003",
 114         "\\u2004",
 115         "\\u2005",
 116         "\\u2006",
 117         "\\u2007",
 118         "\\u2008",
 119         "\\u2009",
 120         "\\u200A",
 121         "\\u200B", -- addition, zero-width space
 122         "\\u2028",
 123         "\\u2029",
 124         "\\u202F",
 125         "\\u205F",
 126         "\\u3000",
 127         "\\uFEFF" -- Zero-width non-breaking space (BOM)
 128 }
 129
 130 local inject_ws_values = {
 131         "%WS%true",
 132         " %WS%'the%WS blob'  %WS%",
 133         "%WS%{ key: %WS%\"valueMan\",%WS% key2:%WS%4.4}",
 134         "%WS%false%WS%"
 135 }
 136 function test_whitespace_ignore()
 137         for _, ws in ipairs(WHITESPACES) do
 138                 ws = json.decode('"' .. ws .. '"')
 139                 for _, v in ipairs(inject_ws_values) do
 140                         v = v:gsub("%%WS%%", ws)
 141                         assert_true(nil ~= json.decode(v))
 142                 end
 143         end
 144 end
 145
 146 function test_u_encoding()
 147         local encoder = json.encode.getEncoder()
 148         local decoder = json.decode.getDecoder()
 149         for i = 0, 255 do
 150                 local char = string.char(i)
 151                 assert_equal(char, decoder(encoder(char)))
 152         end
 153 end
 154
 155 function test_x_encoding()
 156         local encoder = json.encode.getEncoder({ strings = { xEncode = true } })
 157         local decoder = json.decode.getDecoder()
 158         for i = 0, 255 do
 159                 local char = string.char(i)
 160                 assert_equal(char, decoder(encoder(char)))
 161         end
 162 end
 163
 164 local multibyte_encoding_values = {
 165         -- 2-byte
 166         { '"\\u0080"', string.char(0xC2, 0x80) },
 167         { '"\\u00A2"', string.char(0xC2, 0xA2) },
 168         { '"\\u07FF"', string.char(0xDF, 0xBF) },
 169         -- 3-byte
 170         { '"\\u0800"', string.char(0xE0, 0xA0, 0x80) },
 171         { '"\\u20AC"', string.char(0xE2, 0x82, 0xAC) },
 172         { '"\\uFEFF"', string.char(0xEF, 0xBB, 0xBF) },
 173         { '"\\uFFFF"', string.char(0xEF, 0xBF, 0xBF) },
 174         -- 4-byte (surrogate pairs)
 175         { '"\\uD800\\uDC00"', string.char(0xF0, 0x90, 0x80, 0x80) },
 176         { '"\\uDBFF\\uDFFF"', string.char(0xF4, 0x8F, 0xBF, 0xBF) }
 177 }
 178
 179 function test_custom_encoding()
 180         local function processor(s)
 181                 return require("utf8_processor").process(s)
 182         end
 183         local encoder = json.encode.getEncoder({
 184                 strings = {
 185                         processor = processor
 186                 }
 187         })
 188         for i, v in ipairs(multibyte_encoding_values) do
 189                 local encoded = encoder(v[2])
 190                 assert_equal(v[1], encoded, "Failed to encode value using custom encoder")
 191         end
 192 end
 193
 194 function test_strict_decoding()
 195         local encoder = json.encode.getEncoder(json.encode.strict)
 196         local decoder = json.decode.getDecoder(json.decode.strict)
 197         for i = 0, 255 do
 198                 local char = string.char(i)
 199                 -- Must wrap character in array due to decoder strict-ness
 200                 assert_equal(char, decoder(encoder({char}))[1])
 201         end
 202 end