bitcoin/test/lint/lint-locale-dependence.py
Jon Atack b6a65568df Fix issues identified by codespell 2.2.1 and update ignored words
and also fix spelling in test/lint/lint-locale-dependence.py not caught by the
spelling linter and fix up a paragraph we are touching here in test/README.md.
2022-09-15 13:03:40 +02:00

261 lines
8.1 KiB
Python
Executable File

#!/usr/bin/env python3
# Copyright (c) 2018-2022 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
#
# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
# whereas no such call is made in bitcoind.
#
# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
# specified by the user's LC_ALL (or LC_*) environment variable as the new
# C locale.
#
# In contrast, bitcoind does not opt in to localization -- no call to
# setlocale(LC_ALL, "") is made and the environment variables LC_* are
# thus ignored.
#
# This results in situations where bitcoind is guaranteed to be running
# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
# depending on the user's environment variables.
#
# An example: Assuming the environment variable LC_ALL=de_DE then the
# call std::to_string(1.23) will return "1.230000" in bitcoind but
# "1,230000" in bitcoin-qt.
#
# From the Qt documentation:
# "On Unix/Linux Qt is configured to use the system locale settings by default.
# This can cause a conflict when using POSIX functions, for instance, when
# converting between data types such as floats and strings, since the notation
# may differ between locales. To get around this problem, call the POSIX function
# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
# or QCoreApplication to reset the locale that is used for number formatting to
# "C"-locale."
#
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
# https://stackoverflow.com/a/34878283 for more details.
#
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf.
import re
import sys
from subprocess import check_output, CalledProcessError
KNOWN_VIOLATIONS = [
"src/dbwrapper.cpp:.*vsnprintf",
"src/test/dbwrapper_tests.cpp:.*snprintf",
"src/test/fuzz/locale.cpp:.*setlocale",
"src/test/fuzz/string.cpp:.*strtol",
"src/test/fuzz/string.cpp:.*strtoul",
"src/test/util_tests.cpp:.*strtoll",
"src/wallet/bdb.cpp:.*DbEnv::strerror", # False positive
"src/util/syserror.cpp:.*strerror", # Outside this function use `SysErrorString`
]
REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
"src/crypto/ctaes/",
"src/leveldb/",
"src/secp256k1/",
"src/minisketch/",
"src/tinyformat.h",
]
LOCALE_DEPENDENT_FUNCTIONS = [
"alphasort", # LC_COLLATE (via strcoll)
"asctime", # LC_TIME (directly)
"asprintf", # (via vasprintf)
"atof", # LC_NUMERIC (via strtod)
"atoi", # LC_NUMERIC (via strtol)
"atol", # LC_NUMERIC (via strtol)
"atoll", # (via strtoll)
"atoq",
"btowc", # LC_CTYPE (directly)
"ctime", # (via asctime or localtime)
"dprintf", # (via vdprintf)
"fgetwc",
"fgetws",
"fold_case", # boost::locale::fold_case
"fprintf", # (via vfprintf)
"fputwc",
"fputws",
"fscanf", # (via __vfscanf)
"fwprintf", # (via __vfwprintf)
"getdate", # via __getdate_r => isspace // __localtime_r
"getwc",
"getwchar",
"is_digit", # boost::algorithm::is_digit
"is_space", # boost::algorithm::is_space
"isalnum", # LC_CTYPE
"isalpha", # LC_CTYPE
"isblank", # LC_CTYPE
"iscntrl", # LC_CTYPE
"isctype", # LC_CTYPE
"isdigit", # LC_CTYPE
"isgraph", # LC_CTYPE
"islower", # LC_CTYPE
"isprint", # LC_CTYPE
"ispunct", # LC_CTYPE
"isspace", # LC_CTYPE
"isupper", # LC_CTYPE
"iswalnum", # LC_CTYPE
"iswalpha", # LC_CTYPE
"iswblank", # LC_CTYPE
"iswcntrl", # LC_CTYPE
"iswctype", # LC_CTYPE
"iswdigit", # LC_CTYPE
"iswgraph", # LC_CTYPE
"iswlower", # LC_CTYPE
"iswprint", # LC_CTYPE
"iswpunct", # LC_CTYPE
"iswspace", # LC_CTYPE
"iswupper", # LC_CTYPE
"iswxdigit", # LC_CTYPE
"isxdigit", # LC_CTYPE
"localeconv", # LC_NUMERIC + LC_MONETARY
"mblen", # LC_CTYPE
"mbrlen",
"mbrtowc",
"mbsinit",
"mbsnrtowcs",
"mbsrtowcs",
"mbstowcs", # LC_CTYPE
"mbtowc", # LC_CTYPE
"mktime",
"normalize", # boost::locale::normalize
"printf", # LC_NUMERIC
"putwc",
"putwchar",
"scanf", # LC_NUMERIC
"setlocale",
"snprintf",
"sprintf",
"sscanf",
"std::locale::global",
"std::to_string",
"stod",
"stof",
"stoi",
"stol",
"stold",
"stoll",
"stoul",
"stoull",
"strcasecmp",
"strcasestr",
"strcoll", # LC_COLLATE
"strerror",
"strfmon",
"strftime", # LC_TIME
"strncasecmp",
"strptime",
"strtod", # LC_NUMERIC
"strtof",
"strtoimax",
"strtol", # LC_NUMERIC
"strtold",
"strtoll",
"strtoq",
"strtoul", # LC_NUMERIC
"strtoull",
"strtoumax",
"strtouq",
"strxfrm", # LC_COLLATE
"swprintf",
"to_lower", # boost::locale::to_lower
"to_title", # boost::locale::to_title
"to_upper", # boost::locale::to_upper
"tolower", # LC_CTYPE
"toupper", # LC_CTYPE
"towctrans",
"towlower", # LC_CTYPE
"towupper", # LC_CTYPE
"trim", # boost::algorithm::trim
"trim_left", # boost::algorithm::trim_left
"trim_right", # boost::algorithm::trim_right
"ungetwc",
"vasprintf",
"vdprintf",
"versionsort",
"vfprintf",
"vfscanf",
"vfwprintf",
"vprintf",
"vscanf",
"vsnprintf",
"vsprintf",
"vsscanf",
"vswprintf",
"vwprintf",
"wcrtomb",
"wcscasecmp",
"wcscoll", # LC_COLLATE
"wcsftime", # LC_TIME
"wcsncasecmp",
"wcsnrtombs",
"wcsrtombs",
"wcstod", # LC_NUMERIC
"wcstof",
"wcstoimax",
"wcstol", # LC_NUMERIC
"wcstold",
"wcstoll",
"wcstombs", # LC_CTYPE
"wcstoul", # LC_NUMERIC
"wcstoull",
"wcstoumax",
"wcswidth",
"wcsxfrm", # LC_COLLATE
"wctob",
"wctomb", # LC_CTYPE
"wctrans",
"wctype",
"wcwidth",
"wprintf"
]
def find_locale_dependent_function_uses():
regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + ")(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args
git_grep_output = list()
try:
git_grep_output = check_output(git_grep_command, universal_newlines=True, encoding="utf8").splitlines()
except CalledProcessError as e:
if e.returncode > 1:
raise e
return git_grep_output
def main():
exit_code = 0
regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
git_grep_output = find_locale_dependent_function_uses()
for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
matches = [line for line in git_grep_output
if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line)
and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line)
and not re.search(regexp_ignore_known_violations, line)]
if matches:
print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
for match in matches:
print(match)
print("")
exit_code = 1
if exit_code == 1:
print("Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. Please avoid using locale-dependent functions if possible.\n")
print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")
sys.exit(exit_code)
if __name__ == "__main__":
main()