core-lightning/common/json_parse_simple.c
Rusty Russell 9589ea0240 common: add routine to get double from JSON.
I don't like it, but we do expose some times like this :(

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2023-01-30 15:15:41 -06:00

593 lines
14 KiB
C

/* JSON core and helpers */
#include "config.h"
#include <assert.h>
#include <ccan/mem/mem.h>
#include <ccan/tal/str/str.h>
#include <common/json_parse_simple.h>
#include <common/utils.h>
#include <errno.h>
#include <inttypes.h>
#include <stdio.h>
#include <wire/onion_wire.h>
const char *json_tok_full(const char *buffer, const jsmntok_t *t)
{
if (t->type == JSMN_STRING)
return buffer + t->start - 1;
return buffer + t->start;
}
/* Include " if it's a string. */
int json_tok_full_len(const jsmntok_t *t)
{
if (t->type == JSMN_STRING)
return t->end - t->start + 2;
return t->end - t->start;
}
bool json_tok_strneq(const char *buffer, const jsmntok_t *tok,
const char *str, size_t len)
{
if (tok->type != JSMN_STRING)
return false;
return memeq(buffer + tok->start, tok->end - tok->start, str, len);
}
bool json_tok_streq(const char *buffer, const jsmntok_t *tok, const char *str)
{
return json_tok_strneq(buffer, tok, str, strlen(str));
}
bool json_tok_startswith(const char *buffer, const jsmntok_t *tok,
const char *prefix)
{
if (tok->type != JSMN_STRING)
return false;
if (tok->end - tok->start < strlen(prefix))
return false;
return memcmp(buffer + tok->start,
prefix, strlen(prefix)) == 0;
}
bool json_tok_endswith(const char *buffer, const jsmntok_t *tok,
const char *suffix)
{
if (tok->type != JSMN_STRING)
return false;
if (tok->end - tok->start < strlen(suffix))
return false;
return memcmp(buffer + tok->end - strlen(suffix),
suffix, strlen(suffix)) == 0;
}
char *json_strdup(const tal_t *ctx, const char *buffer, const jsmntok_t *tok)
{
return tal_strndup(ctx, buffer + tok->start, tok->end - tok->start);
}
bool json_to_u64(const char *buffer, const jsmntok_t *tok, u64 *num)
{
char *end;
unsigned long long l;
l = strtoull(buffer + tok->start, &end, 0);
if (end != buffer + tok->end)
return false;
BUILD_ASSERT(sizeof(l) >= sizeof(*num));
*num = l;
/* Check for overflow */
if (l == ULLONG_MAX && errno == ERANGE)
return false;
if (*num != l)
return false;
return true;
}
bool json_str_to_u64(const char *buffer, const jsmntok_t *tok, u64 *num)
{
jsmntok_t temp;
if (tok->type != JSMN_STRING)
return false;
temp = *tok;
temp.start += 1;
temp.end -= 1;
return json_to_u64(buffer, &temp, num);
}
bool json_to_double(const char *buffer, const jsmntok_t *tok, double *num)
{
char *end;
errno = 0;
*num = strtod(buffer + tok->start, &end);
if (end != buffer + tok->end)
return false;
/* Check for overflow */
if (errno == ERANGE)
return false;
return true;
}
bool json_to_u32(const char *buffer, const jsmntok_t *tok, u32 *num)
{
uint64_t u64;
if (!json_to_u64(buffer, tok, &u64))
return false;
*num = u64;
/* Just in case it doesn't fit. */
if (*num != u64)
return false;
return true;
}
bool json_to_bool(const char *buffer, const jsmntok_t *tok, bool *b)
{
if (tok->type != JSMN_PRIMITIVE)
return false;
if (memeqstr(buffer + tok->start, tok->end - tok->start, "true")) {
*b = true;
return true;
}
if (memeqstr(buffer + tok->start, tok->end - tok->start, "false")) {
*b = false;
return true;
}
return false;
}
bool json_tok_is_num(const char *buffer, const jsmntok_t *tok)
{
if (tok->type != JSMN_PRIMITIVE)
return false;
for (int i = tok->start; i < tok->end; i++)
if (!cisdigit(buffer[i]))
return false;
return true;
}
bool json_tok_is_null(const char *buffer, const jsmntok_t *tok)
{
if (tok->type != JSMN_PRIMITIVE)
return false;
return buffer[tok->start] == 'n';
}
const jsmntok_t *json_next(const jsmntok_t *tok)
{
const jsmntok_t *t;
size_t i;
for (t = tok + 1, i = 0; i < tok->size; i++)
t = json_next(t);
return t;
}
const jsmntok_t *json_get_membern(const char *buffer,
const jsmntok_t tok[],
const char *label, size_t len)
{
const jsmntok_t *t;
size_t i;
if (tok->type != JSMN_OBJECT)
return NULL;
json_for_each_obj(i, t, tok)
if (json_tok_strneq(buffer, t, label, len))
return t + 1;
return NULL;
}
const jsmntok_t *json_get_member(const char *buffer, const jsmntok_t tok[],
const char *label)
{
return json_get_membern(buffer, tok, label, strlen(label));
}
const jsmntok_t *json_get_arr(const jsmntok_t tok[], size_t index)
{
const jsmntok_t *t;
size_t i;
if (tok->type != JSMN_ARRAY)
return NULL;
json_for_each_arr(i, t, tok) {
if (index == 0)
return t;
index--;
}
return NULL;
}
const char *json_get_id(const tal_t *ctx,
const char *buffer, const jsmntok_t *obj)
{
const jsmntok_t *idtok = json_get_member(buffer, obj, "id");
if (!idtok)
return NULL;
return tal_strndup(ctx,
json_tok_full(buffer, idtok),
json_tok_full_len(idtok));
}
/*-----------------------------------------------------------------------------
JSMN Result Validation Starts
-----------------------------------------------------------------------------*/
/*~ LIBJSMN is a fast, small JSON parsing library.
*
* "Fast, small" means it does not, in fact, do a
* lot of checking for invalid JSON.
*
* For example, by itself it would accept the strings
* `{"1" "2" "3" "4"}` and `["key": 1 2 3 4]` as valid.
* Obviously those are not in any way valid JSON.
*
* This part of the code performs some filtering so
* that at least some of the invalid JSON that
* LIBJSMN accepts, will be rejected by
* json_parse_input. It also checks that strings are valid UTF-8.
*/
/*~ These functions are used in JSMN validation.
*
* The calling convention is that the "current" token
* is passed in as the first argument, and after the
* validator, is returned from the function.
*
* p = validate_jsmn_datum(p, end, valid);
*
* The reason has to do with typical C ABIs.
* Usually, the first few arguments are passed in via
* register, and the return value is also returned
* via register.
* This calling convention generally ensures that
* the current token pointer `p` is always in a
* register and is never forced into memory by the
* compiler.
*
* These functions are pre-declared here as they
* are interrecursive.
* Note that despite the recursion, `p` is only ever
* advanced, and there is only ever one `p` value,
* thus the overall algorithm is strict O(n)
* (*not* amortized) in time.
* The recursion does mean the algorithm is O(d)
* in memory (specifically stack frames), where d
* is the nestedness of objects in the input.
* This may become an issue later if we are in a
* stack-limited environment, such as if we actually
* went and used threads.
*/
/* Validate a *single* datum. */
static const jsmntok_t *
validate_jsmn_datum(const char *buf,
const jsmntok_t *p,
const jsmntok_t *end,
bool *valid);
/*~ Validate a key-value pair.
*
* In JSMN, objects are not dictionaries.
* Instead, they are a sequence of datums.
*
* In fact, objects and arrays in JSMN are "the same",
* they only differ in delimiter characters.
*
* Of course, in "real" JSON, an object is a dictionary
* of key-value pairs.
*
* So what JSMN does is that the syntax "key": "value"
* is considered a *single* datum, a string "key"
* that contains a value "value".
*
* Indeed, JSMN accepts `["key": "value"]` as well as
* `{"item1", "item2"}`.
* The entire point of the validate_jsmn_result function
* is to reject such improper arrays and objects.
*/
static const jsmntok_t *
validate_jsmn_keyvalue(const char *buf,
const jsmntok_t *p,
const jsmntok_t *end,
bool *valid);
static const jsmntok_t *
validate_jsmn_datum(const char *buf,
const jsmntok_t *p,
const jsmntok_t *end,
bool *valid)
{
int i;
int sz;
if (p >= end) {
*valid = false;
return p;
}
switch (p->type) {
case JSMN_STRING:
if (!utf8_check(buf + p->start, p->end - p->start))
*valid = false;
/* Fall thru */
case JSMN_UNDEFINED:
case JSMN_PRIMITIVE:
/* These types should not have sub-datums. */
if (p->size != 0)
*valid = false;
else
++p;
break;
case JSMN_ARRAY:
/* Save the array size; we will advance p. */
sz = p->size;
++p;
for (i = 0; i < sz; ++i) {
/* Arrays should only contain standard JSON datums. */
p = validate_jsmn_datum(buf, p, end, valid);
if (!*valid)
break;
}
break;
case JSMN_OBJECT:
/* Save the object size; we will advance p. */
sz = p->size;
++p;
for (i = 0; i < sz; ++i) {
/* Objects should only contain key-value pairs. */
p = validate_jsmn_keyvalue(buf, p, end, valid);
if (!*valid)
break;
}
break;
default:
*valid = false;
break;
}
return p;
}
/* Key-value pairs *must* be strings with size 1. */
static inline const jsmntok_t *
validate_jsmn_keyvalue(const char *buf,
const jsmntok_t *p,
const jsmntok_t *end,
bool *valid)
{
if (p >= end) {
*valid = false;
return p;
}
/* Check key.
*
* JSMN parses the syntax `"key": "value"` as a
* JSMN_STRING of size 1, containing the value
* datum as a sub-datum.
*
* Thus, keys in JSON objects are really strings
* that "contain" the value, thus we check if
* the size is 1.
*
* JSMN supports a non-standard syntax such as
* `"key": 1 2 3 4`, which it considers as a
* string object that contains a sequence of
* sub-datums 1 2 3 4.
* The check below that p->size == 1 also
* incidentally rejects that non-standard
* JSON.
*/
if (p->type != JSMN_STRING || p->size != 1
|| !utf8_check(buf + p->start, p->end - p->start)) {
*valid = false;
return p;
}
++p;
return validate_jsmn_datum(buf, p, end, valid);
}
/** validate_jsmn_parse_output
*
* @brief Validates the result of jsmn_parse.
*
* @desc LIBJMSN is a small fast library, not a
* comprehensive library.
*
* This simply means that LIBJSMN will accept a
* *lot* of very strange text that is technically
* not JSON.
*
* For example, LIBJSMN would accept the strings
* `{"1" "2" "3" "4"}` and `["key": 1 2 3 4]` as valid.
*
* This can lead to strange sequences of jsmntok_t
* objects.
* Unfortunately, most of our code assumes that
* the data fed into our JSON-RPC interface is
* valid JSON, and in particular is not invalid
* JSON that tickles LIBJSMN into emitting
* strange sequences of `jsmntok_t`.
*
* This function detects such possible problems
* and returns false if such an issue is found.
* If so, it is probably unsafe to pass the
* `jsmntok_t` generated by LIBJSMN to any other
* parts of our code.
*
* @param p - The first jsmntok_t token to process.
* This function does not assume that semantically
* only one JSON datum is processed; it does expect
* a sequence of complete JSON datums (which is
* what LIBJSMN *should* output).
* @param end - One past the end of jsmntok_t.
* Basically, this function is assured to read tokens
* starting at p up to end - 1.
* If p >= end, this will not validate anything and
* trivially return true.
*
* @return true if there appears to be no problem
* with the jsmntok_t sequence outputted by
* `jsmn_parse`, false otherwise.
*/
static bool
validate_jsmn_parse_output(const char *buf,
const jsmntok_t *p, const jsmntok_t *end)
{
bool valid = true;
while (p < end && valid)
p = validate_jsmn_datum(buf, p, end, &valid);
return valid;
}
/*-----------------------------------------------------------------------------
JSMN Result Validation Ends
-----------------------------------------------------------------------------*/
void toks_reset(jsmntok_t *toks)
{
assert(tal_count(toks) >= 1);
toks[0].type = JSMN_UNDEFINED;
}
jsmntok_t *toks_alloc(const tal_t *ctx)
{
jsmntok_t *toks = tal_arr(ctx, jsmntok_t, 10);
toks_reset(toks);
return toks;
}
bool json_parse_input(jsmn_parser *parser,
jsmntok_t **toks,
const char *input, int len,
bool *complete)
{
int ret;
again:
ret = jsmn_parse(parser, input, len, *toks, tal_count(*toks) - 1);
switch (ret) {
case JSMN_ERROR_INVAL:
return false;
case JSMN_ERROR_NOMEM:
tal_resize(toks, tal_count(*toks) * 2);
goto again;
}
/* Check whether we read at least one full root element, i.e., root
* element has its end set. */
if ((*toks)[0].type == JSMN_UNDEFINED || (*toks)[0].end == -1) {
*complete = false;
return true;
}
/* If we read a partial element at the end of the stream we'll get a
* ret=JSMN_ERROR_PART, but due to the previous check we know we read at
* least one full element, so count tokens that are part of this root
* element. */
ret = json_next(*toks) - *toks;
if (!validate_jsmn_parse_output(input, *toks, *toks + ret))
return false;
/* Cut to length and return. */
tal_resize(toks, ret + 1);
/* Make sure last one is always referenceable. */
(*toks)[ret].type = -1;
(*toks)[ret].start = (*toks)[ret].end = (*toks)[ret].size = 0;
*complete = true;
return true;
}
jsmntok_t *json_parse_simple(const tal_t *ctx, const char *input, int len)
{
bool complete;
jsmn_parser parser;
jsmntok_t *toks = toks_alloc(ctx);
jsmn_init(&parser);
if (!json_parse_input(&parser, &toks, input, len, &complete)
|| !complete)
return tal_free(toks);
return toks;
}
const char *jsmntype_to_string(jsmntype_t t)
{
switch (t) {
case JSMN_UNDEFINED :
return "UNDEFINED";
case JSMN_OBJECT :
return "OBJECT";
case JSMN_ARRAY :
return "ARRAY";
case JSMN_STRING :
return "STRING";
case JSMN_PRIMITIVE :
return "PRIMITIVE";
}
return "INVALID";
}
jsmntok_t *json_tok_copy(const tal_t *ctx, const jsmntok_t *tok)
{
return tal_dup_arr(ctx, jsmntok_t, tok, json_next(tok) - tok, 0);
}
void json_tok_remove(jsmntok_t **tokens,
jsmntok_t *obj_or_array, const jsmntok_t *tok, size_t num)
{
const jsmntok_t *src = tok;
const jsmntok_t *end = json_next(*tokens);
jsmntok_t *dest = *tokens + (tok - *tokens);
int remove_count;
assert(*tokens);
assert(obj_or_array->type == JSMN_ARRAY
|| obj_or_array->type == JSMN_OBJECT);
/* obj_or_array must be inside tokens, and tok must be inside
* obj_or_array */
assert(obj_or_array >= *tokens
&& obj_or_array < *tokens + tal_count(*tokens));
assert(tok >= obj_or_array
&& tok < *tokens + tal_count(*tokens));
for (int i = 0; i < num; i++)
src = json_next(src);
/* Don't give us a num which goes over end of obj_or_array. */
assert(src <= json_next(obj_or_array));
remove_count = src - tok;
memmove(dest, src, sizeof(jsmntok_t) * (end - src));
/* Subtract first: this ptr may move after tal_resize! */
obj_or_array->size -= num;
tal_resize(tokens, tal_count(*tokens) - remove_count);
}