mirror of
https://github.com/ElementsProject/lightning.git
synced 2025-02-22 22:45:27 +01:00
common/json.c: Check that JSMN result is well-formed.
xref: https://lists.ozlabs.org/pipermail/c-lightning/2020-June/000188.html Changelog-Fixed: Reject some bad JSON at parsing.
This commit is contained in:
parent
4302afd9a5
commit
e8936f9d23
2 changed files with 238 additions and 4 deletions
231
common/json.c
231
common/json.c
|
@ -301,6 +301,235 @@ const jsmntok_t *json_get_arr(const jsmntok_t tok[], size_t index)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
JSMN Result Validation Starts
|
||||
-----------------------------------------------------------------------------*/
|
||||
/*~ LIBJSMN is a fast, small JSON parsing library.
|
||||
*
|
||||
* "Fast, small" means it does not, in fact, do a
|
||||
* lot of checking for invalid JSON.
|
||||
*
|
||||
* For example, by itself it would accept the strings
|
||||
* `{"1" "2" "3" "4"}` and `["key": 1 2 3 4]` as valid.
|
||||
* Obviously those are not in any way valid JSON.
|
||||
*
|
||||
* This part of the code performs some filtering so
|
||||
* that at least some of the invalid JSON that
|
||||
* LIBJSMN accepts, will be rejected by
|
||||
* json_parse_input.
|
||||
*/
|
||||
|
||||
/*~ These functions are used in JSMN validation.
|
||||
*
|
||||
* The calling convention is that the "current" token
|
||||
* is passed in as the first argument, and after the
|
||||
* validator, is returned from the function.
|
||||
*
|
||||
* p = validate_jsmn_datum(p, end, valid);
|
||||
*
|
||||
* The reason has to do with typical C ABIs.
|
||||
* Usually, the first few arguments are passed in via
|
||||
* register, and the return value is also returned
|
||||
* via register.
|
||||
* This calling convention generally ensures that
|
||||
* the current token pointer `p` is always in a
|
||||
* register and is never forced into memory by the
|
||||
* compiler.
|
||||
*
|
||||
* These functions are pre-declared here as they
|
||||
* are interrecursive.
|
||||
* Note that despite the recursion, `p` is only ever
|
||||
* advanced, and there is only ever one `p` value,
|
||||
* thus the overall algorithm is strict O(n)
|
||||
* (*not* amortized) in time.
|
||||
* The recursion does mean the algorithm is O(d)
|
||||
* in memory (specifically stack frames), where d
|
||||
* is the nestedness of objects in the input.
|
||||
* This may become an issue later if we are in a
|
||||
* stack-limited environment, such as if we actually
|
||||
* went and used threads.
|
||||
*/
|
||||
/* Validate a *single* datum. */
|
||||
static const jsmntok_t *
|
||||
validate_jsmn_datum(const jsmntok_t *p,
|
||||
const jsmntok_t *end,
|
||||
bool *valid);
|
||||
/*~ Validate a key-value pair.
|
||||
*
|
||||
* In JSMN, objects are not dictionaries.
|
||||
* Instead, they are a sequence of datums.
|
||||
*
|
||||
* In fact, objects and arrays in JSMN are "the same",
|
||||
* they only differ in delimiter characters.
|
||||
*
|
||||
* Of course, in "real" JSON, an object is a dictionary
|
||||
* of key-value pairs.
|
||||
*
|
||||
* So what JSMN does is that the syntax "key": "value"
|
||||
* is considered a *single* datum, a string "key"
|
||||
* that contains a value "value".
|
||||
*
|
||||
* Indeed, JSMN accepts `["key": "value"]` as well as
|
||||
* `{"item1", "item2"}`.
|
||||
* The entire point of the validate_jsmn_result function
|
||||
* is to reject such improper arrays and objects.
|
||||
*/
|
||||
static const jsmntok_t *
|
||||
validate_jsmn_keyvalue(const jsmntok_t *p,
|
||||
const jsmntok_t *end,
|
||||
bool *valid);
|
||||
|
||||
static const jsmntok_t *
|
||||
validate_jsmn_datum(const jsmntok_t *p,
|
||||
const jsmntok_t *end,
|
||||
bool *valid)
|
||||
{
|
||||
int i;
|
||||
int sz;
|
||||
|
||||
if (p >= end) {
|
||||
*valid = false;
|
||||
return p;
|
||||
}
|
||||
|
||||
switch (p->type) {
|
||||
case JSMN_UNDEFINED:
|
||||
case JSMN_STRING:
|
||||
case JSMN_PRIMITIVE:
|
||||
/* These types should not have sub-datums. */
|
||||
if (p->size != 0)
|
||||
*valid = false;
|
||||
else
|
||||
++p;
|
||||
break;
|
||||
|
||||
case JSMN_ARRAY:
|
||||
/* Save the array size; we will advance p. */
|
||||
sz = p->size;
|
||||
++p;
|
||||
for (i = 0; i < sz; ++i) {
|
||||
/* Arrays should only contain standard JSON datums. */
|
||||
p = validate_jsmn_datum(p, end, valid);
|
||||
if (!*valid)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case JSMN_OBJECT:
|
||||
/* Save the object size; we will advance p. */
|
||||
sz = p->size;
|
||||
++p;
|
||||
for (i = 0; i < sz; ++i) {
|
||||
/* Objects should only contain key-value pairs. */
|
||||
p = validate_jsmn_keyvalue(p, end, valid);
|
||||
if (!*valid)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
*valid = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
/* Key-value pairs *must* be strings with size 1. */
|
||||
static inline const jsmntok_t *
|
||||
validate_jsmn_keyvalue(const jsmntok_t *p,
|
||||
const jsmntok_t *end,
|
||||
bool *valid)
|
||||
{
|
||||
if (p >= end) {
|
||||
*valid = false;
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Check key.
|
||||
*
|
||||
* JSMN parses the syntax `"key": "value"` as a
|
||||
* JSMN_STRING of size 1, containing the value
|
||||
* datum as a sub-datum.
|
||||
*
|
||||
* Thus, keys in JSON objects are really strings
|
||||
* that "contain" the value, thus we check if
|
||||
* the size is 1.
|
||||
*
|
||||
* JSMN supports a non-standard syntax such as
|
||||
* `"key": 1 2 3 4`, which it considers as a
|
||||
* string object that contains a sequence of
|
||||
* sub-datums 1 2 3 4.
|
||||
* The check below that p->size == 1 also
|
||||
* incidentally rejects that non-standard
|
||||
* JSON.
|
||||
*/
|
||||
if (p->type != JSMN_STRING || p->size != 1) {
|
||||
*valid = false;
|
||||
return p;
|
||||
}
|
||||
|
||||
++p;
|
||||
return validate_jsmn_datum(p, end, valid);
|
||||
}
|
||||
|
||||
/** validate_jsmn_parse_output
|
||||
*
|
||||
* @brief Validates the result of jsmn_parse.
|
||||
*
|
||||
* @desc LIBJMSN is a small fast library, not a
|
||||
* comprehensive library.
|
||||
*
|
||||
* This simply means that LIBJSMN will accept a
|
||||
* *lot* of very strange text that is technically
|
||||
* not JSON.
|
||||
*
|
||||
* For example, LIBJSMN would accept the strings
|
||||
* `{"1" "2" "3" "4"}` and `["key": 1 2 3 4]` as valid.
|
||||
*
|
||||
* This can lead to strange sequences of jsmntok_t
|
||||
* objects.
|
||||
* Unfortunately, most of our code assumes that
|
||||
* the data fed into our JSON-RPC interface is
|
||||
* valid JSON, and in particular is not invalid
|
||||
* JSON that tickles LIBJSMN into emitting
|
||||
* strange sequences of `jsmntok_t`.
|
||||
*
|
||||
* This function detects such possible problems
|
||||
* and returns false if such an issue is found.
|
||||
* If so, it is probably unsafe to pass the
|
||||
* `jsmntok_t` generated by LIBJSMN to any other
|
||||
* parts of our code.
|
||||
*
|
||||
* @param p - The first jsmntok_t token to process.
|
||||
* This function does not assume that semantically
|
||||
* only one JSON datum is processed; it does expect
|
||||
* a sequence of complete JSON datums (which is
|
||||
* what LIBJSMN *should* output).
|
||||
* @param end - One past the end of jsmntok_t.
|
||||
* Basically, this function is assured to read tokens
|
||||
* starting at p up to end - 1.
|
||||
* If p >= end, this will not validate anything and
|
||||
* trivially return true.
|
||||
*
|
||||
* @return true if there appears to be no problem
|
||||
* with the jsmntok_t sequence outputted by
|
||||
* `jsmn_parse`, false otherwise.
|
||||
*/
|
||||
static bool
|
||||
validate_jsmn_parse_output(const jsmntok_t *p, const jsmntok_t *end)
|
||||
{
|
||||
bool valid = true;
|
||||
|
||||
while (p < end && valid)
|
||||
p = validate_jsmn_datum(p, end, &valid);
|
||||
|
||||
return valid;
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
JSMN Result Validation Ends
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
jsmntok_t *json_parse_input(const tal_t *ctx,
|
||||
const char *input, int len, bool *valid)
|
||||
{
|
||||
|
@ -338,7 +567,7 @@ again:
|
|||
ret = json_next(toks) - toks;
|
||||
|
||||
/* Cut to length and return. */
|
||||
*valid = true;
|
||||
*valid = validate_jsmn_parse_output(toks, toks + ret);
|
||||
tal_resize(&toks, ret + 1);
|
||||
/* Make sure last one is always referenceable. */
|
||||
toks[ret].type = -1;
|
||||
|
|
|
@ -122,20 +122,25 @@ static void test_json_tok_size(void)
|
|||
assert(toks[0].size == 2);
|
||||
assert(toks[1].size == 2);
|
||||
|
||||
buf = "{\"e1\" : {\"e2\", \"e3\"}}";
|
||||
buf = "{\"e1\" : {\"e2\": 2, \"e3\": 3}}";
|
||||
toks = json_parse_input(tmpctx, buf, strlen(buf), &ok);
|
||||
assert(ok);
|
||||
/* size only counts *direct* children */
|
||||
assert(toks[0].size == 1);
|
||||
assert(toks[2].size == 2);
|
||||
|
||||
buf = "{\"e1\" : {\"e2\", \"e3\"}, \"e4\" : {\"e5\", \"e6\"}}";
|
||||
buf = "{\"e1\" : {\"e2\": 2, \"e3\": 3}, \"e4\" : {\"e5\": 5, \"e6\": 6}}";
|
||||
toks = json_parse_input(tmpctx, buf, strlen(buf), &ok);
|
||||
assert(ok);
|
||||
/* size only counts *direct* children */
|
||||
assert(toks[0].size == 2);
|
||||
assert(toks[2].size == 2);
|
||||
assert(toks[6].size == 2);
|
||||
assert(toks[8].size == 2);
|
||||
|
||||
/* This should *not* parse! (used to give toks[0]->size == 3!) */
|
||||
buf = "{ \"\" \"\" \"\" }";
|
||||
toks = json_parse_input(tmpctx, buf, strlen(buf), &ok);
|
||||
assert(!ok);
|
||||
|
||||
/* This should *not* parse! (used to give toks[0]->size == 2!) */
|
||||
buf = "{ 'satoshi', '546' }";
|
||||
|
|
Loading…
Add table
Reference in a new issue