all: 5.2 compatibility
[luajson.git] / lua / json / decode / strings.lua
blob14c1a42dde89a0fc61012c68a7edc7076b4348bb
1 --[[
2 Licensed according to the included 'LICENSE' document
3 Author: Thomas Harning Jr <harningt@gmail.com>
4 ]]
5 local lpeg = require("lpeg")
6 local util = require("json.decode.util")
7 local merge = require("json.util").merge
9 local tonumber = tonumber
10 local string_char = require("string").char
11 local floor = require("math").floor
12 local table_concat = require("table").concat
14 local error = error
16 local is_52 = _VERSION == "Lua 5.2"
17 local _G = _G
19 if is_52 then
20 _ENV = nil
21 end
23 local function get_error(item)
24 local fmt_string = item .. " in string [%q] @ %i:%i"
25 return function(data, index)
26 local line, line_index, bad_char, last_line = util.get_invalid_character_info(data, index)
27 local err = fmt_string:format(bad_char, line, line_index)
28 error(err)
29 end
30 end
32 local bad_unicode = get_error("Illegal unicode escape")
33 local bad_hex = get_error("Illegal hex escape")
34 local bad_character = get_error("Illegal character")
35 local bad_escape = get_error("Illegal escape")
37 local knownReplacements = {
38 ["'"] = "'",
39 ['"'] = '"',
40 ['\\'] = '\\',
41 ['/'] = '/',
42 b = '\b',
43 f = '\f',
44 n = '\n',
45 r = '\r',
46 t = '\t',
47 v = '\v',
48 z = '\z'
51 -- according to the table at http://da.wikipedia.org/wiki/UTF-8
52 local function utf8DecodeUnicode(code1, code2)
53 code1, code2 = tonumber(code1, 16), tonumber(code2, 16)
54 if code1 == 0 and code2 < 0x80 then
55 return string_char(code2)
56 end
57 if code1 < 0x08 then
58 return string_char(
59 0xC0 + code1 * 4 + floor(code2 / 64),
60 0x80 + code2 % 64)
61 end
62 return string_char(
63 0xE0 + floor(code1 / 16),
64 0x80 + (code1 % 16) * 4 + floor(code2 / 64),
65 0x80 + code2 % 64)
66 end
68 local function decodeX(code)
69 code = tonumber(code, 16)
70 return string_char(code)
71 end
73 local doSimpleSub = lpeg.C(lpeg.S("'\"\\/bfnrtvz")) / knownReplacements
74 local doUniSub = lpeg.P('u') * (lpeg.C(util.hexpair) * lpeg.C(util.hexpair) + lpeg.P(bad_unicode))
75 local doXSub = lpeg.P('x') * (lpeg.C(util.hexpair) + lpeg.P(bad_hex))
77 local defaultOptions = {
78 badChars = '',
79 additionalEscapes = false, -- disallow untranslated escapes
80 escapeCheck = #lpeg.S('bfnrtv/\\"xu\'z'), -- no check on valid characters
81 decodeUnicode = utf8DecodeUnicode,
82 strict_quotes = false
85 local default = nil -- Let the buildCapture optimization take place
87 local strict = {
88 badChars = '\b\f\n\r\t\v',
89 additionalEscapes = false, -- no additional escapes
90 escapeCheck = #lpeg.S('bfnrtv/\\"u'), --only these chars are allowed to be escaped
91 strict_quotes = true
94 local function buildCaptureString(quote, badChars, escapeMatch)
95 local captureChar = (1 - lpeg.S("\\" .. badChars .. quote)) + (lpeg.P("\\") / "" * escapeMatch)
96 captureChar = captureChar + (-#lpeg.P(quote) * lpeg.P(bad_character))
97 local captureString = captureChar^0
98 return lpeg.P(quote) * lpeg.Cs(captureString) * lpeg.P(quote)
99 end
101 local function buildCapture(options)
102 options = options and merge({}, defaultOptions, options) or defaultOptions
103 local quotes = { '"' }
104 if not options.strict_quotes then
105 quotes[#quotes + 1] = "'"
107 local escapeMatch = doSimpleSub
108 escapeMatch = escapeMatch + doXSub / decodeX
109 escapeMatch = escapeMatch + doUniSub / options.decodeUnicode
110 if options.additionalEscapes then
111 escapeMatch = escapeMatch + options.additionalEscapes
113 if options.escapeCheck then
114 escapeMatch = options.escapeCheck * escapeMatch + lpeg.P(bad_escape)
116 local captureString
117 for i = 1, #quotes do
118 local cap = buildCaptureString(quotes[i], options.badChars, escapeMatch)
119 if captureString == nil then
120 captureString = cap
121 else
122 captureString = captureString + cap
125 return captureString
128 local function register_types()
129 util.register_type("STRING")
132 local function load_types(options, global_options, grammar)
133 local capture = buildCapture(options)
134 local string_id = util.types.STRING
135 grammar[string_id] = capture
136 util.append_grammar_item(grammar, "VALUE", lpeg.V(string_id))
139 local strings = {
140 default = default,
141 strict = strict,
142 register_types = register_types,
143 load_types = load_types
146 if not is_52 then
147 _G.json = _G.json or {}
148 _G.json.decode = _G.json.decode or {}
149 _G.json.decode.strings = strings
152 return strings