lnbits-legend/tools/i18n-ai-tool.py

# 1. Always check the results of the procedure
# 2. Always run "npx prettier -w lnbits/static/i18n/XX.js" to reformat the result

import os
import re
import sys

import json5
from openai import OpenAI

if len(sys.argv) < 2:
    print("Usage: python3 tools/i18n-tool.py <code> [language]")
    sys.exit(1)
lang = sys.argv[1]

assert os.getenv("OPENAI_API_KEY"), "OPENAI_API_KEY env var not set"


def load_language(lang: str) -> dict:
    s = open(f"lnbits/static/i18n/{lang}.js").read()
    prefix = "window.localisation.%s = {\n" % lang
    assert s.startswith(prefix)
    s = s[len(prefix) - 2 :]
    json = json5.loads(s)
    assert isinstance(json, dict)
    return json


def save_language(lang: str, data) -> None:
    with open(f"lnbits/static/i18n/{lang}.js", "w") as f:
        f.write("window.localisation.%s = {\n" % lang)
        row = 0
        for k, v in data.items():
            row += 1
            f.write("  %s:\n" % k)
            if "'" in v:
                f.write('    "%s"' % v)
            else:
                f.write("    '%s'" % v)
            if row == len(data):
                f.write("\n")
            else:
                f.write(",\n")
        f.write("}\n")


def string_variables_match(str1: str, str2: str) -> bool:
    pat = re.compile(r"%\{[a-z0-9_]*\}")
    m1 = re.findall(pat, str1)
    m2 = re.findall(pat, str2)
    return sorted(m1) == sorted(m2)


def translate_string(lang_from, lang_to, text):
    target = {
        "de": "German",
        "es": "Spanish",
        "jp": "Japan",
        "cn": "Chinese",
        "fr": "French",
        "it": "Italian",
        "pi": "Pirate",
        "nl": "Dutch",
        "we": "Welsh",
        "pl": "Polish",
        "pt": "Portuguese",
        "br": "Brazilian Portugese",
        "cs": "Czech",
        "sk": "Slovak",
        "kr": "Korean",
        "fi": "Finnish",
    }[lang_to]
    client = OpenAI()
    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are a language expert who speaks all the languages of the world perfectly. You are tasked with translating a text from English into another language. The text is part of the software you are working on. If the text contains a phrase enclosed in curly braces and preceded by a percent sign, do not translate this phrase; instead, keep it verbatim. For instance, the phrase %{amount} should remain %{amount} in the target language. Your output should only be the translated string, nothing more.",  # noqa: E501
                },
                {
                    "role": "user",
                    "content": f"Translate the following string from English to {target}: {text}",  # noqa: E501
                },
            ],
            model="gpt-4-1106-preview",  # aka GPT-4 Turbo
        )
        assert chat_completion.choices[0].message.content, "No response from GPT-4"
        translated = chat_completion.choices[0].message.content.strip()
        # return translated string only if variables were not broken
        if string_variables_match(text, translated):
            return translated
        else:
            return None
    except Exception:
        return None


data_en = load_language("en")
data = load_language(lang)

missing = set(data_en.keys()) - set(data.keys())
print(f"Missing {len(missing)} keys in language '{lang}'")

if len(missing) > 0:
    new = {}
    for k in data_en:
        if k in data:
            new[k] = data[k]
        else:
            print(f"Translating key '{k}'")
            print(f"{data_en[k]}")
            translated = translate_string("en", lang, data_en[k])
            print("->")
            if translated:
                print(f"{translated}")
                new[k] = translated
            else:
                print("ERROR")
            print()
    save_language(lang, new)
else:
    # check whether variables match for each string
    for k in data_en:
        if not string_variables_match(data_en[k], data[k]):
            print(f"Variables mismatch ({k}):")
            print(data_en[k])
            print(data[k])
add i18n checker and autogenerated AI translations (#2160) * use translated string * fix typos in lnbits/static/i18n/{it,jp,nl,we}.js * add missing strings to cs,en,sk translations * remove duplicates from lnbits/static/i18n/{cs,en,kr,sk}.js * add i18n checker * add i18n ai tool * add autogenerated AI translations * add i18n-ai-tool check whether variables in formatted strings are not broken * fix issues with variables found by the script * chore: make bundle 2023-12-12 11:10:51 +01:00			`# 1. Always check the results of the procedure`
			`# 2. Always run "npx prettier -w lnbits/static/i18n/XX.js" to reformat the result`

			`import os`
			`import re`
			`import sys`

			`import json5`
			`from openai import OpenAI`

			`if len(sys.argv) < 2:`
			`print("Usage: python3 tools/i18n-tool.py <code> [language]")`
			`sys.exit(1)`
			`lang = sys.argv[1]`

test: lint `tests` and `tools` aswell (#2296) * test: lint `tests` and `tools` aswell more linting :) * fix linting issues in tests and tools * fixup! * how is this working? 2024-02-27 14:30:52 +01:00			`assert os.getenv("OPENAI_API_KEY"), "OPENAI_API_KEY env var not set"`
add i18n checker and autogenerated AI translations (#2160) * use translated string * fix typos in lnbits/static/i18n/{it,jp,nl,we}.js * add missing strings to cs,en,sk translations * remove duplicates from lnbits/static/i18n/{cs,en,kr,sk}.js * add i18n checker * add i18n ai tool * add autogenerated AI translations * add i18n-ai-tool check whether variables in formatted strings are not broken * fix issues with variables found by the script * chore: make bundle 2023-12-12 11:10:51 +01:00
test: lint `tests` and `tools` aswell (#2296) * test: lint `tests` and `tools` aswell more linting :) * fix linting issues in tests and tools * fixup! * how is this working? 2024-02-27 14:30:52 +01:00
			`def load_language(lang: str) -> dict:`
chore: adhere to ruff's `UP` basically use `list` and `type` instead of `List` and `Type` this is save to use for python3.9 and has been deprecated. also has some performance drawbacks. read more here: https://docs.astral.sh/ruff/rules/non-pep585-annotation/ 2024-04-01 18:50:21 +02:00			`s = open(f"lnbits/static/i18n/{lang}.js").read()`
add i18n checker and autogenerated AI translations (#2160) * use translated string * fix typos in lnbits/static/i18n/{it,jp,nl,we}.js * add missing strings to cs,en,sk translations * remove duplicates from lnbits/static/i18n/{cs,en,kr,sk}.js * add i18n checker * add i18n ai tool * add autogenerated AI translations * add i18n-ai-tool check whether variables in formatted strings are not broken * fix issues with variables found by the script * chore: make bundle 2023-12-12 11:10:51 +01:00			`prefix = "window.localisation.%s = {\n" % lang`
			`assert s.startswith(prefix)`
			`s = s[len(prefix) - 2 :]`
test: lint `tests` and `tools` aswell (#2296) * test: lint `tests` and `tools` aswell more linting :) * fix linting issues in tests and tools * fixup! * how is this working? 2024-02-27 14:30:52 +01:00			`json = json5.loads(s)`
			`assert isinstance(json, dict)`
			`return json`
add i18n checker and autogenerated AI translations (#2160) * use translated string * fix typos in lnbits/static/i18n/{it,jp,nl,we}.js * add missing strings to cs,en,sk translations * remove duplicates from lnbits/static/i18n/{cs,en,kr,sk}.js * add i18n checker * add i18n ai tool * add autogenerated AI translations * add i18n-ai-tool check whether variables in formatted strings are not broken * fix issues with variables found by the script * chore: make bundle 2023-12-12 11:10:51 +01:00

test: lint `tests` and `tools` aswell (#2296) * test: lint `tests` and `tools` aswell more linting :) * fix linting issues in tests and tools * fixup! * how is this working? 2024-02-27 14:30:52 +01:00			`def save_language(lang: str, data) -> None:`
chore: adhere to ruff's `UP` basically use `list` and `type` instead of `List` and `Type` this is save to use for python3.9 and has been deprecated. also has some performance drawbacks. read more here: https://docs.astral.sh/ruff/rules/non-pep585-annotation/ 2024-04-01 18:50:21 +02:00			`with open(f"lnbits/static/i18n/{lang}.js", "w") as f:`
add i18n checker and autogenerated AI translations (#2160) * use translated string * fix typos in lnbits/static/i18n/{it,jp,nl,we}.js * add missing strings to cs,en,sk translations * remove duplicates from lnbits/static/i18n/{cs,en,kr,sk}.js * add i18n checker * add i18n ai tool * add autogenerated AI translations * add i18n-ai-tool check whether variables in formatted strings are not broken * fix issues with variables found by the script * chore: make bundle 2023-12-12 11:10:51 +01:00			`f.write("window.localisation.%s = {\n" % lang)`
			`row = 0`
			`for k, v in data.items():`
			`row += 1`
			`f.write(" %s:\n" % k)`
			`if "'" in v:`
			`f.write(' "%s"' % v)`
			`else:`
			`f.write(" '%s'" % v)`
			`if row == len(data):`
			`f.write("\n")`
			`else:`
			`f.write(",\n")`
			`f.write("}\n")`


test: lint `tests` and `tools` aswell (#2296) * test: lint `tests` and `tools` aswell more linting :) * fix linting issues in tests and tools * fixup! * how is this working? 2024-02-27 14:30:52 +01:00			`def string_variables_match(str1: str, str2: str) -> bool:`
add i18n checker and autogenerated AI translations (#2160) * use translated string * fix typos in lnbits/static/i18n/{it,jp,nl,we}.js * add missing strings to cs,en,sk translations * remove duplicates from lnbits/static/i18n/{cs,en,kr,sk}.js * add i18n checker * add i18n ai tool * add autogenerated AI translations * add i18n-ai-tool check whether variables in formatted strings are not broken * fix issues with variables found by the script * chore: make bundle 2023-12-12 11:10:51 +01:00			`pat = re.compile(r"%\{[a-z0-9_]*\}")`
			`m1 = re.findall(pat, str1)`
			`m2 = re.findall(pat, str2)`
			`return sorted(m1) == sorted(m2)`


			`def translate_string(lang_from, lang_to, text):`
			`target = {`
			`"de": "German",`
			`"es": "Spanish",`
			`"jp": "Japan",`
			`"cn": "Chinese",`
			`"fr": "French",`
			`"it": "Italian",`
			`"pi": "Pirate",`
			`"nl": "Dutch",`
			`"we": "Welsh",`
			`"pl": "Polish",`
			`"pt": "Portuguese",`
			`"br": "Brazilian Portugese",`
			`"cs": "Czech",`
			`"sk": "Slovak",`
			`"kr": "Korean",`
feat: add node url to api keys & docs (#2283) * feat: add node url to api keys & docs closes #2277 * add finnish to tools/i18n-ai-tool.py * regenerate localization with i18n-ai-tool * chore: make bundle --------- Co-authored-by: Pavol Rusnak <pavol@rusnak.io> 2024-02-20 12:32:49 +01:00			`"fi": "Finnish",`
add i18n checker and autogenerated AI translations (#2160) * use translated string * fix typos in lnbits/static/i18n/{it,jp,nl,we}.js * add missing strings to cs,en,sk translations * remove duplicates from lnbits/static/i18n/{cs,en,kr,sk}.js * add i18n checker * add i18n ai tool * add autogenerated AI translations * add i18n-ai-tool check whether variables in formatted strings are not broken * fix issues with variables found by the script * chore: make bundle 2023-12-12 11:10:51 +01:00			`}[lang_to]`
			`client = OpenAI()`
			`try:`
			`chat_completion = client.chat.completions.create(`
			`messages=[`
			`{`
			`"role": "system",`
fix: small update to i18n-ai-tool system prompt 2024-04-18 12:53:51 +02:00			"content": "You are a language expert who speaks all the languages of the world perfectly. You are tasked with translating a text from English into another language. The text is part of the software you are working on. If the text contains a phrase enclosed in curly braces and preceded by a percent sign, do not translate this phrase; instead, keep it verbatim. For instance, the phrase %{amount} should remain %{amount} in the target language. Your output should only be the translated string, nothing more.", # noqa: E501
add i18n checker and autogenerated AI translations (#2160) * use translated string * fix typos in lnbits/static/i18n/{it,jp,nl,we}.js * add missing strings to cs,en,sk translations * remove duplicates from lnbits/static/i18n/{cs,en,kr,sk}.js * add i18n checker * add i18n ai tool * add autogenerated AI translations * add i18n-ai-tool check whether variables in formatted strings are not broken * fix issues with variables found by the script * chore: make bundle 2023-12-12 11:10:51 +01:00			`},`
			`{`
			`"role": "user",`
			`"content": f"Translate the following string from English to {target}: {text}", # noqa: E501`
			`},`
			`],`
			`model="gpt-4-1106-preview", # aka GPT-4 Turbo`
			`)`
test: lint `tests` and `tools` aswell (#2296) * test: lint `tests` and `tools` aswell more linting :) * fix linting issues in tests and tools * fixup! * how is this working? 2024-02-27 14:30:52 +01:00			`assert chat_completion.choices[0].message.content, "No response from GPT-4"`
add i18n checker and autogenerated AI translations (#2160) * use translated string * fix typos in lnbits/static/i18n/{it,jp,nl,we}.js * add missing strings to cs,en,sk translations * remove duplicates from lnbits/static/i18n/{cs,en,kr,sk}.js * add i18n checker * add i18n ai tool * add autogenerated AI translations * add i18n-ai-tool check whether variables in formatted strings are not broken * fix issues with variables found by the script * chore: make bundle 2023-12-12 11:10:51 +01:00			`translated = chat_completion.choices[0].message.content.strip()`
			`# return translated string only if variables were not broken`
			`if string_variables_match(text, translated):`
			`return translated`
			`else:`
			`return None`
			`except Exception:`
			`return None`


			`data_en = load_language("en")`
			`data = load_language(lang)`

			`missing = set(data_en.keys()) - set(data.keys())`
			`print(f"Missing {len(missing)} keys in language '{lang}'")`

			`if len(missing) > 0:`
			`new = {}`
			`for k in data_en:`
			`if k in data:`
			`new[k] = data[k]`
			`else:`
			`print(f"Translating key '{k}'")`
			`print(f"{data_en[k]}")`
			`translated = translate_string("en", lang, data_en[k])`
			`print("->")`
			`if translated:`
			`print(f"{translated}")`
			`new[k] = translated`
			`else:`
			`print("ERROR")`
			`print()`
			`save_language(lang, new)`
			`else:`
			`# check whether variables match for each string`
			`for k in data_en:`
			`if not string_variables_match(data_en[k], data[k]):`
			`print(f"Variables mismatch ({k}):")`
			`print(data_en[k])`
			`print(data[k])`