tools/check-bolt: rewrite to give more accurate diagnostics when we're out of sync.

Just use substrings not regexes, since we have more complex characters now LaTeX
is entering the spec!

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell 2024-06-19 09:27:01 +09:30
parent 1de569d38d
commit 4a36edf7c1

View file

@ -135,55 +135,46 @@ static char *find_bolt_ref(const char *prefix, char **p, size_t *len)
} }
} }
static char *code_to_regex(const char *code, size_t len, bool escape) /* Replace '*' at start of line with whitespace, canonicalize */
static char *de_prefix(char *str)
{ {
char *pattern = tal_arr(NULL, char, len*2 + 1), *p; bool start_of_line = true;
size_t i; size_t i;
bool after_nl = true;
/* We swallow '*' if first in line: block comments */ for (i = 0; str[i]; i++) {
p = pattern; if (start_of_line && str[i] == '*') {
for (i = 0; i < len; i++) { str[i] = ' ';
/* ... matches anything. */ start_of_line = false;
if (strstarts(code + i, "...")) {
*(p++) = '.';
*(p++) = '*';
i += 2;
continue;
} }
switch (code[i]) { /* Stay start of line until whitespace ends */
case '\n': if (start_of_line)
after_nl = true; start_of_line = cisspace(str[i]);
*(p++) = code[i]; else
start_of_line = (str[i] == '\n');
}
return canonicalize(str);
}
/* Take a quote, split it on '...' (trim line prefixes) */
static char **split_pattern(const char *code, size_t len)
{
char **strings = tal_arr(NULL, char *, 0);
const char *sep;
while ((sep = strstr(code, "...")) != NULL) {
size_t matchlen = sep - code;
if (sep > code + len)
break; break;
tal_arr_expand(&strings,
de_prefix(tal_strndup(strings, code, matchlen)));
code += matchlen + strlen("...");
len -= matchlen + strlen("...");
}
case '*': tal_arr_expand(&strings, de_prefix(tal_strndup(strings, code, len)));
if (after_nl) { return strings;
after_nl = false;
continue;
}
/* Fall through. */
case '.':
case '$':
case '^':
case '[':
case ']':
case '{':
case '}':
case '(':
case ')':
case '+':
case '|':
if (escape)
*(p++) = '\\';
/* Fall through */
default:
*(p++) = code[i];
}
}
*p = '\0';
return canonicalize(pattern);
} }
/* Moves *pos to start of line. */ /* Moves *pos to start of line. */
@ -204,28 +195,60 @@ static unsigned linenum(const char *raw, const char **pos)
} }
static void fail_mismatch(const char *filename, static void fail_mismatch(const char *filename,
const char *raw, const char *pos, const char *raw,
size_t len, struct bolt_file *bolt) const char *pos,
size_t len,
char **strings,
struct bolt_file *bolt)
{ {
unsigned line = linenum(raw, &pos); unsigned line = linenum(raw, &pos);
char *try; /* If they all match, order must be wrong. */
const char *match = NULL;
int matchlen;
fprintf(stderr, "%s:%u:mismatch:%.*s\n", /* Figure out which substring didn't match, and how much to cut it */
filename, line, (int)strcspn(pos, "\n"), pos); for (size_t i = 0; i < tal_count(strings); i++) {
/* Try to find longest match, as a hint. */ if (strstr(bolt->contents, strings[i]))
try = code_to_regex(pos + strcspn(pos, "\n"), len, false); continue;
while (strlen(try)) {
const char *p = strstr(bolt->contents, try); /* OK, it doesn't match, truncate it until it does */
if (p) { matchlen = strlen(strings[i]);
fprintf(stderr, "Closest match: %s...[%.20s]\n", while (matchlen) {
try, p + strlen(try)); match = memmem(bolt->contents, strlen(bolt->contents),
strings[i], matchlen);
if (match)
break;
matchlen--;
}
break; break;
} }
try[strlen(try)-1] = '\0';
fprintf(stderr, "%s:%u:", filename, line);
if (match) {
fprintf(stderr, "Closest match: %.*s...[%.20s]\n",
matchlen, match, match + matchlen);
} else {
fprintf(stderr, "Parts match, but not in this order\n");
} }
exit(1); exit(1);
} }
static bool find_strings(const char *bolttext, char **strings, size_t nstrings)
{
const char *p = bolttext;
char *find;
if (nstrings == 0)
return true;
while ((find = strstr(p, strings[0])) != NULL) {
if (find_strings(find + strlen(strings[0]), strings+1, nstrings-1))
return true;
p = find + 1;
}
return false;
}
static void fail_nobolt(const char *filename, static void fail_nobolt(const char *filename,
const char *raw, const char *pos, const char *raw, const char *pos,
const char *bolt_prefix) const char *bolt_prefix)
@ -295,12 +318,13 @@ int main(int argc, char *argv[])
p = f; p = f;
while ((bolt = find_bolt_ref(prefix, &p, &len)) != NULL) { while ((bolt = find_bolt_ref(prefix, &p, &len)) != NULL) {
char *pattern = code_to_regex(p, len, true); char **strings = split_pattern(p, len);
struct bolt_file *b = find_bolt(bolt, bolts); struct bolt_file *b = find_bolt(bolt, bolts);
if (!b) if (!b)
fail_nobolt(argv[i], f, p, bolt); fail_nobolt(argv[i], f, p, bolt);
if (!tal_strreg(f, b->contents, pattern, NULL))
fail_mismatch(argv[i], f, p, len, b); if (!find_strings(b->contents, strings, tal_count(strings)))
fail_mismatch(argv[i], f, p, len, strings, b);
if (verbose) if (verbose)
printf(" Found %.10s... in %s\n", printf(" Found %.10s... in %s\n",