Merge bitcoin/bitcoin#24916: lint: Convert lint-python-utf8-encoding.sh to Python

035eef4be6 lint: Convert lint-python-utf8-encoding.sh to Python (Dimitri)

Pull request description:

  A port of `test/lint/lint-python-utf8-encoding.sh` to a Python-script as part of the request of #24783. Checked for output-consistency.

ACKs for top commit:
  laanwj:
    Code review ACK 035eef4be6

Tree-SHA512: a8a2f505bf7953d318837182101346c44e73cfd1bf3b5342ff1400fb1c67c5292519fa99db1035da87cf27fb5f5ac5d28871bf55a1c085b5f8a3bb33ff0fa3fb
This commit is contained in:
laanwj 2022-04-25 17:57:09 +02:00
commit c90b42bcdb
No known key found for this signature in database
GPG key ID: 1E4AED62986CD25D
2 changed files with 73 additions and 28 deletions

View file

@ -0,0 +1,73 @@
#!/usr/bin/env python3
#
# Copyright (c) 2018-2022 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
#
# Make sure we explicitly open all text files using UTF-8 (or ASCII) encoding to
# avoid potential issues on the BSDs where the locale is not always set.
import sys
import re
from subprocess import check_output, CalledProcessError
EXCLUDED_DIRS = ["src/crc32c/"]
def get_exclude_args():
return [":(exclude)" + dir for dir in EXCLUDED_DIRS]
def check_fileopens():
fileopens = list()
try:
fileopens = check_output(["git", "grep", r" open(", "--", "*.py"] + get_exclude_args(), universal_newlines=True, encoding="utf8").splitlines()
except CalledProcessError as e:
if e.returncode > 1:
raise e
filtered_fileopens = [fileopen for fileopen in fileopens if not re.search(r"encoding=.(ascii|utf8|utf-8).|open\([^,]*, ['\"][^'\"]*b[^'\"]*['\"]", fileopen)]
return filtered_fileopens
def check_checked_outputs():
checked_outputs = list()
try:
checked_outputs = check_output(["git", "grep", "check_output(", "--", "*.py"] + get_exclude_args(), universal_newlines=True, encoding="utf8").splitlines()
except CalledProcessError as e:
if e.returncode > 1:
raise e
filtered_checked_outputs = [checked_output for checked_output in checked_outputs if re.search(r"universal_newlines=True", checked_output) and not re.search(r"encoding=.(ascii|utf8|utf-8).", checked_output)]
return filtered_checked_outputs
def main():
exit_code = 0
nonexplicit_utf8_fileopens = check_fileopens()
if nonexplicit_utf8_fileopens:
print("Python's open(...) seems to be used to open text files without explicitly specifying encoding='utf8':\n")
for fileopen in nonexplicit_utf8_fileopens:
print(fileopen)
exit_code = 1
nonexplicit_utf8_checked_outputs = check_checked_outputs()
if nonexplicit_utf8_checked_outputs:
if nonexplicit_utf8_fileopens:
print("\n")
print("Python's check_output(...) seems to be used to get program outputs without explicitly specifying encoding='utf8':\n")
for checked_output in nonexplicit_utf8_checked_outputs:
print(checked_output)
exit_code = 1
sys.exit(exit_code)
if __name__ == "__main__":
main()

View file

@ -1,28 +0,0 @@
#!/usr/bin/env bash
#
# Copyright (c) 2018-2020 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
#
# Make sure we explicitly open all text files using UTF-8 (or ASCII) encoding to
# avoid potential issues on the BSDs where the locale is not always set.
export LC_ALL=C
EXIT_CODE=0
OUTPUT=$(git grep " open(" -- "*.py" ":(exclude)src/crc32c/" | grep -vE "encoding=.(ascii|utf8|utf-8)." | grep -vE "open\([^,]*, ['\"][^'\"]*b[^'\"]*['\"]")
if [[ ${OUTPUT} != "" ]]; then
echo "Python's open(...) seems to be used to open text files without explicitly"
echo "specifying encoding=\"utf8\":"
echo
echo "${OUTPUT}"
EXIT_CODE=1
fi
OUTPUT=$(git grep "check_output(" -- "*.py" ":(exclude)src/crc32c/"| grep "universal_newlines=True" | grep -vE "encoding=.(ascii|utf8|utf-8).")
if [[ ${OUTPUT} != "" ]]; then
echo "Python's check_output(...) seems to be used to get program outputs without explicitly"
echo "specifying encoding=\"utf8\":"
echo
echo "${OUTPUT}"
EXIT_CODE=1
fi
exit ${EXIT_CODE}