2023-12-12 11:10:51 +01:00
# 1. Always check the results of the procedure
# 2. Always run "npx prettier -w lnbits/static/i18n/XX.js" to reformat the result
import os
import re
import sys
import json5
from openai import OpenAI
if len ( sys . argv ) < 2 :
print ( " Usage: python3 tools/i18n-tool.py <code> [language] " )
sys . exit ( 1 )
lang = sys . argv [ 1 ]
2024-02-27 14:30:52 +01:00
assert os . getenv ( " OPENAI_API_KEY " ) , " OPENAI_API_KEY env var not set "
2023-12-12 11:10:51 +01:00
2024-02-27 14:30:52 +01:00
def load_language ( lang : str ) - > dict :
2024-04-01 18:50:21 +02:00
s = open ( f " lnbits/static/i18n/ { lang } .js " ) . read ( )
2023-12-12 11:10:51 +01:00
prefix = " window.localisation. %s = { \n " % lang
assert s . startswith ( prefix )
s = s [ len ( prefix ) - 2 : ]
2024-02-27 14:30:52 +01:00
json = json5 . loads ( s )
assert isinstance ( json , dict )
return json
2023-12-12 11:10:51 +01:00
2024-02-27 14:30:52 +01:00
def save_language ( lang : str , data ) - > None :
2024-04-01 18:50:21 +02:00
with open ( f " lnbits/static/i18n/ { lang } .js " , " w " ) as f :
2023-12-12 11:10:51 +01:00
f . write ( " window.localisation. %s = { \n " % lang )
row = 0
for k , v in data . items ( ) :
row + = 1
f . write ( " %s : \n " % k )
if " ' " in v :
f . write ( ' " %s " ' % v )
else :
f . write ( " ' %s ' " % v )
if row == len ( data ) :
f . write ( " \n " )
else :
f . write ( " , \n " )
f . write ( " } \n " )
2024-02-27 14:30:52 +01:00
def string_variables_match ( str1 : str , str2 : str ) - > bool :
2023-12-12 11:10:51 +01:00
pat = re . compile ( r " % \ { [a-z0-9_]* \ } " )
m1 = re . findall ( pat , str1 )
m2 = re . findall ( pat , str2 )
return sorted ( m1 ) == sorted ( m2 )
def translate_string ( lang_from , lang_to , text ) :
target = {
" de " : " German " ,
" es " : " Spanish " ,
" jp " : " Japan " ,
" cn " : " Chinese " ,
" fr " : " French " ,
" it " : " Italian " ,
" pi " : " Pirate " ,
" nl " : " Dutch " ,
" we " : " Welsh " ,
" pl " : " Polish " ,
" pt " : " Portuguese " ,
" br " : " Brazilian Portugese " ,
" cs " : " Czech " ,
" sk " : " Slovak " ,
" kr " : " Korean " ,
2024-02-20 12:32:49 +01:00
" fi " : " Finnish " ,
2023-12-12 11:10:51 +01:00
} [ lang_to ]
client = OpenAI ( )
try :
chat_completion = client . chat . completions . create (
messages = [
{
" role " : " system " ,
2024-04-18 12:53:51 +02:00
" content " : " You are a language expert who speaks all the languages of the world perfectly. You are tasked with translating a text from English into another language. The text is part of the software you are working on. If the text contains a phrase enclosed in curly braces and preceded by a percent sign, do not translate this phrase; instead, keep it verbatim. For instance, the phrase % {amount} should remain % {amount} in the target language. Your output should only be the translated string, nothing more. " , # noqa: E501
2023-12-12 11:10:51 +01:00
} ,
{
" role " : " user " ,
" content " : f " Translate the following string from English to { target } : { text } " , # noqa: E501
} ,
] ,
model = " gpt-4-1106-preview " , # aka GPT-4 Turbo
)
2024-02-27 14:30:52 +01:00
assert chat_completion . choices [ 0 ] . message . content , " No response from GPT-4 "
2023-12-12 11:10:51 +01:00
translated = chat_completion . choices [ 0 ] . message . content . strip ( )
# return translated string only if variables were not broken
if string_variables_match ( text , translated ) :
return translated
else :
return None
except Exception :
return None
data_en = load_language ( " en " )
data = load_language ( lang )
missing = set ( data_en . keys ( ) ) - set ( data . keys ( ) )
print ( f " Missing { len ( missing ) } keys in language ' { lang } ' " )
if len ( missing ) > 0 :
new = { }
for k in data_en :
if k in data :
new [ k ] = data [ k ]
else :
print ( f " Translating key ' { k } ' " )
print ( f " { data_en [ k ] } " )
translated = translate_string ( " en " , lang , data_en [ k ] )
print ( " -> " )
if translated :
print ( f " { translated } " )
new [ k ] = translated
else :
print ( " ERROR " )
print ( )
save_language ( lang , new )
else :
# check whether variables match for each string
for k in data_en :
if not string_variables_match ( data_en [ k ] , data [ k ] ) :
print ( f " Variables mismatch ( { k } ): " )
print ( data_en [ k ] )
print ( data [ k ] )