🌐 Language Import/Export (#25889)

This commit is contained in:
Scott Lahteine
2023-06-30 19:21:56 -05:00
committed by GitHub
parent 1bc155e810
commit 0878b99b2c
42 changed files with 2751 additions and 2470 deletions

View File

View File

@@ -1,19 +1,21 @@
#!/usr/bin/env python3
#
# exportTranslations.py
#
# Export LCD language strings to CSV files for easier translation.
# Use importTranslations.py to import CSV into the language files.
#
'''
languageExport.py
Export LCD language strings to CSV files for easier translation.
Use importTranslations.py to import CSV into the language files.
'''
import re
from pathlib import Path
from languageUtil import namebyid
LANGHOME = "Marlin/src/lcd/language"
# Write multiple sheets if true, otherwise write one giant sheet
MULTISHEET = True
# Where to look for the language files
LANGHOME = "Marlin/src/lcd/language"
OUTDIR = 'out-csv'
# Check for the path to the language files
if not Path(LANGHOME).is_dir():
@@ -21,39 +23,6 @@ if not Path(LANGHOME).is_dir():
print("Edit LANGHOME or cd to the root of the repo before running.")
exit(1)
# A dictionary to contain language names
LANGNAME = {
'an': "Aragonese",
'bg': "Bulgarian",
'ca': "Catalan",
'cz': "Czech",
'da': "Danish",
'de': "German",
'el': "Greek", 'el_CY': "Greek (Cyprus)", 'el_gr': "Greek (Greece)",
'en': "English",
'es': "Spanish",
'eu': "Basque-Euskera",
'fi': "Finnish",
'fr': "French", 'fr_na': "French (no accent)",
'gl': "Galician",
'hr': "Croatian (Hrvatski)",
'hu': "Hungarian / Magyar",
'it': "Italian",
'jp_kana': "Japanese (Kana)",
'ko_KR': "Korean",
'nl': "Dutch",
'pl': "Polish",
'pt': "Portuguese", 'pt_br': "Portuguese (Brazil)",
'ro': "Romanian",
'ru': "Russian",
'sk': "Slovak",
'sv': "Swedish",
'tr': "Turkish",
'uk': "Ukrainian",
'vi': "Vietnamese",
'zh_CN': "Simplified Chinese", 'zh_TW': "Traditional Chinese"
}
# A limit just for testing
LIMIT = 0
@@ -80,8 +49,7 @@ for langfile in langfiles:
if not f: continue
# Flags to indicate a wide or tall section
wideflag = False
tallflag = False
wideflag, tallflag = False, False
# A counter for the number of strings in the file
stringcount = 0
# A dictionary to hold all the strings
@@ -94,25 +62,20 @@ for langfile in langfiles:
# Check for wide or tall sections, assume no complicated nesting
if line.startswith("#endif") or line.startswith("#else"):
wideflag = False
tallflag = False
wideflag, tallflag = False, False
elif re.match(r'#if.*WIDTH\s*>=?\s*2[01].*', line): wideflag = True
elif re.match(r'#if.*LCD_HEIGHT\s*>=?\s*4.*', line): tallflag = True
# For string-defining lines capture the string data
match = re.match(r'LSTR\s+([A-Z0-9_]+)\s*=\s*(.+)\s*', line)
if match:
# The name is the first captured group
name = match.group(1)
# The value is the second captured group
value = match.group(2)
# Replace escaped quotes temporarily
value = value.replace('\\"', '__Q__')
# Name and quote-sanitized value
name, value = match.group(1), match.group(2).replace('\\"', '$$$')
# Remove all _UxGT wrappers from the value in a non-greedy way
value = re.sub(r'_UxGT\((".*?")\)', r'\1', value)
# Multi-line strings will get one or more bars | for identification
# Multi-line strings get one or more bars | for identification
multiline = 0
multimatch = re.match(r'.*MSG_(\d)_LINE\s*\(\s*(.+?)\s*\).*', value)
if multimatch:
@@ -122,7 +85,7 @@ for langfile in langfiles:
# Wrap inline defines in parentheses
value = re.sub(r' *([A-Z0-9]+_[A-Z0-9_]+) *', r'(\1)', value)
# Remove quotes around strings
value = re.sub(r'"(.*?)"', r'\1', value).replace('__Q__', '"')
value = re.sub(r'"(.*?)"', r'\1', value).replace('$$$', '""')
# Store all unique names as dictionary keys
names[name] = 1
# Store the string as narrow or wide
@@ -144,6 +107,9 @@ langcodes = list(language_strings.keys())
# Print the array
#print(language_strings)
# Report the total number of unique strings
print("Found %s distinct LCD strings." % len(names))
# Write a single language entry to the CSV file with narrow, wide, and tall strings
def write_csv_lang(f, strings, name):
f.write(',')
@@ -157,32 +123,27 @@ if MULTISHEET:
#
# Export a separate sheet for each language
#
OUTDIR = 'csv-out'
Path.mkdir(Path(OUTDIR), exist_ok=True)
for lang in langcodes:
f = open("%s/language_%s.csv" % (OUTDIR, lang), 'w', encoding='utf-8')
if not f: continue
with open("%s/language_%s.csv" % (OUTDIR, lang), 'w', encoding='utf-8') as f:
lname = lang + ' ' + namebyid(lang)
header = ['name', lname, lname + ' (wide)', lname + ' (tall)']
f.write('"' + '","'.join(header) + '"\n')
lname = lang + ' ' + LANGNAME[lang]
header = ['name', lname, lname + ' (wide)', lname + ' (tall)']
f.write('"' + '","'.join(header) + '"\n')
for name in names.keys():
f.write('"' + name + '"')
write_csv_lang(f, language_strings[lang], name)
f.write('\n')
f.close()
for name in names.keys():
f.write('"' + name + '"')
write_csv_lang(f, language_strings[lang], name)
f.write('\n')
else:
#
# Export one large sheet containing all languages
#
f = open("languages.csv", 'w', encoding='utf-8')
if f:
with open("languages.csv", 'w', encoding='utf-8') as f:
header = ['name']
for lang in langcodes:
lname = lang + ' ' + LANGNAME[lang]
lname = lang + ' ' + namebyid(lang)
header += [lname, lname + ' (wide)', lname + ' (tall)']
f.write('"' + '","'.join(header) + '"\n')
@@ -190,4 +151,3 @@ else:
f.write('"' + name + '"')
for lang in langcodes: write_csv_lang(f, language_strings[lang], name)
f.write('\n')
f.close()

View File

@@ -0,0 +1,219 @@
#!/usr/bin/env python3
"""
languageImport.py
Import LCD language strings from a CSV file or Google Sheets
and write Marlin LCD language files based on the data.
Use languageExport.py to export CSV from the language files.
Google Sheets Link:
https://docs.google.com/spreadsheets/d/12yiy-kS84ajKFm7oQIrC4CF8ZWeu9pAR4zrgxH4ruk4/edit#gid=84528699
TODO: Use the defines and comments above the namespace from existing language files.
Get the 'constexpr uint8_t CHARSIZE' from existing language files.
Get the correct 'using namespace' for languages that don't inherit from English.
"""
import sys, re, requests, csv, datetime
from languageUtil import namebyid
LANGHOME = "Marlin/src/lcd/language"
OUTDIR = 'out-language'
# Get the file path from the command line
FILEPATH = sys.argv[1] if len(sys.argv) > 1 else None
download = FILEPATH == 'download'
if not FILEPATH or download:
SHEETID = "12yiy-kS84ajKFm7oQIrC4CF8ZWeu9pAR4zrgxH4ruk4"
FILEPATH = 'https://docs.google.com/spreadsheet/ccc?key=%s&output=csv' % SHEETID
if FILEPATH.startswith('http'):
response = requests.get(FILEPATH)
assert response.status_code == 200, 'GET failed for %s' % FILEPATH
csvdata = response.content.decode('utf-8')
else:
if not FILEPATH.endswith('.csv'): FILEPATH += '.csv'
with open(FILEPATH, 'r', encoding='utf-8') as f: csvdata = f.read()
if not csvdata:
print("Error: couldn't read CSV data from %s" % FILEPATH)
exit(1)
if download:
DLNAME = sys.argv[2] if len(sys.argv) > 2 else 'languages.csv'
if not DLNAME.endswith('.csv'): DLNAME += '.csv'
with open(DLNAME, 'w', encoding='utf-8') as f: f.write(csvdata)
print("Downloaded %s from %s" % (DLNAME, FILEPATH))
exit(0)
lines = csvdata.splitlines()
print(lines)
reader = csv.reader(lines, delimiter=',')
gothead = False
columns = ['']
numcols = 0
strings_per_lang = {}
for row in reader:
if not gothead:
gothead = True
numcols = len(row)
if row[0] != 'name':
print('Error: first column should be "name"')
exit(1)
# The rest of the columns are language codes and names
for i in range(1, numcols):
elms = row[i].split(' ')
lang = elms[0]
style = ('Wide' if elms[-1] == '(wide)' else 'Tall' if elms[-1] == '(tall)' else 'Narrow')
columns.append({ 'lang': lang, 'style': style })
if not lang in strings_per_lang: strings_per_lang[lang] = {}
if not style in strings_per_lang[lang]: strings_per_lang[lang][style] = {}
continue
# Add the named string for all the included languages
name = row[0]
for i in range(1, numcols):
str = row[i]
if str:
col = columns[i]
strings_per_lang[col['lang']][col['style']][name] = str
# Create a folder for the imported language outfiles
from pathlib import Path
Path.mkdir(Path(OUTDIR), exist_ok=True)
FILEHEADER = '''
/**
* Marlin 3D Printer Firmware
* Copyright (c) 2023 MarlinFirmware [https://github.com/MarlinFirmware/Marlin]
*
* Based on Sprinter and grbl.
* Copyright (c) 2011 Camiel Gubbels / Erik van der Zalm
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
#pragma once
/**
* %s
*
* LCD Menu Messages
* See also https://marlinfw.org/docs/development/lcd_language.html
*
* Substitutions are applied for the following characters when used in menu items titles:
*
* $ displays an inserted string
* { displays '0'....'10' for indexes 0 - 10
* ~ displays '1'....'11' for indexes 0 - 10
* * displays 'E1'...'E11' for indexes 0 - 10 (By default. Uses LCD_FIRST_TOOL)
* @ displays an axis name such as XYZUVW, or E for an extruder
*/
'''
# Iterate over the languages which correspond to the columns
# The columns are assumed to be grouped by language in the order Narrow, Wide, Tall
# TODO: Go through lang only, then impose the order Narrow, Wide, Tall.
# So if something is missing or out of order everything still gets built correctly.
f = None
gotlang = {}
for i in range(1, numcols):
#if i > 6: break # Testing
col = columns[i]
lang, style = col['lang'], col['style']
# If we haven't already opened a file for this language, do so now
if not lang in gotlang:
gotlang[lang] = {}
if f: f.close()
fn = "%s/language_%s.h" % (OUTDIR, lang)
f = open(fn, 'w', encoding='utf-8')
if not f:
print("Failed to open %s." % fn)
exit(1)
# Write the opening header for the new language file
#f.write(FILEHEADER % namebyid(lang))
f.write('/**\n * Imported from %s on %s at %s\n */\n' % (FILEPATH, datetime.date.today(), datetime.datetime.now().strftime("%H:%M:%S")))
# Start a namespace for the language and style
f.write('\nnamespace Language%s_%s {\n' % (style, lang))
# Wide and tall namespaces inherit from the others
if style == 'Wide':
f.write(' using namespace LanguageNarrow_%s;\n' % lang)
f.write(' #if LCD_WIDTH >= 20 || HAS_DWIN_E3V2\n')
elif style == 'Tall':
f.write(' using namespace LanguageWide_%s;\n' % lang)
f.write(' #if LCD_HEIGHT >= 4\n')
elif lang != 'en':
f.write(' using namespace Language_en; // Inherit undefined strings from English\n')
# Formatting for the lines
indent = ' ' if style == 'Narrow' else ' '
width = 34 if style == 'Narrow' else 32
lstr_fmt = '%sLSTR %%-%ds = %%s;%%s\n' % (indent, width)
# Emit all the strings for this language and style
for name in strings_per_lang[lang][style].keys():
# Get the raw string value
val = strings_per_lang[lang][style][name]
# Count the number of bars
if val.startswith('|'):
bars = val.count('|')
val = val[1:]
else:
bars = 0
# Escape backslashes, substitute quotes, and wrap in _UxGT("...")
val = '_UxGT("%s")' % val.replace('\\', '\\\\').replace('"', '$$$')
# Move named references outside of the macro
val = re.sub(r'\(([A-Z0-9]+_[A-Z0-9_]+)\)', r'") \1 _UxGT("', val)
# Remove all empty _UxGT("") that result from the above
val = re.sub(r'\s*_UxGT\(""\)\s*', '', val)
# No wrapper needed for just spaces
val = re.sub(r'_UxGT\((" +")\)', r'\1', val)
# Multi-line strings start with a bar...
if bars:
# Wrap the string in MSG_#_LINE(...) and split on bars
val = re.sub(r'^_UxGT\((.+)\)', r'_UxGT(MSG_%s_LINE(\1))' % bars, val)
val = val.replace('|', '", "')
# Restore quotes inside the string
val = val.replace('$$$', '\\"')
# Add a comment with the English string for reference
comm = ''
if lang != 'en' and 'en' in strings_per_lang:
en = strings_per_lang['en']
if name in en[style]: str = en[style][name]
elif name in en['Narrow']: str = en['Narrow'][name]
if str:
cfmt = '%%%ss// %%s' % (50 - len(val) if len(val) < 50 else 1)
comm = cfmt % (' ', str)
# Write out the string definition
f.write(lstr_fmt % (name, val, comm))
if style == 'Wide' or style == 'Tall': f.write(' #endif\n')
f.write('}\n') # End namespace
# Assume the 'Tall' namespace comes last
if style == 'Tall': f.write('\nnamespace Language_%s {\n using namespace LanguageTall_%s;\n}\n' % (lang, lang))
# Close the last-opened output file
if f: f.close()

View File

@@ -0,0 +1,41 @@
#!/usr/bin/env python3
#
# marlang.py
#
# A dictionary to contain language names
LANGNAME = {
'an': "Aragonese",
'bg': "Bulgarian",
'ca': "Catalan",
'cz': "Czech",
'da': "Danish",
'de': "German",
'el': "Greek", 'el_CY': "Greek (Cyprus)", 'el_gr': "Greek (Greece)",
'en': "English",
'es': "Spanish",
'eu': "Basque-Euskera",
'fi': "Finnish",
'fr': "French", 'fr_na': "French (no accent)",
'gl': "Galician",
'hr': "Croatian (Hrvatski)",
'hu': "Hungarian / Magyar",
'it': "Italian",
'jp_kana': "Japanese (Kana)",
'ko_KR': "Korean",
'nl': "Dutch",
'pl': "Polish",
'pt': "Portuguese", 'pt_br': "Portuguese (Brazil)",
'ro': "Romanian",
'ru': "Russian",
'sk': "Slovak",
'sv': "Swedish",
'tr': "Turkish",
'uk': "Ukrainian",
'vi': "Vietnamese",
'zh_CN': "Simplified Chinese", 'zh_TW': "Traditional Chinese"
}
def namebyid(id):
if id in LANGNAME: return LANGNAME[id]
return '<unknown>'