2019-08-07 12:18:26 +02:00
|
|
|
#! /usr/bin/env python3
|
2018-12-13 14:50:33 +01:00
|
|
|
#
|
2020-08-21 23:19:41 +02:00
|
|
|
# SPDX-FileCopyrightText: 2018 Adriaan de Groot <groot@kde.org>
|
|
|
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
#
|
2018-12-13 14:50:33 +01:00
|
|
|
# Uses the Transifex API to get a list of enabled languages,
|
|
|
|
# and outputs CMake settings for inclusion into CMakeLists.txt.
|
2020-01-27 16:35:56 +01:00
|
|
|
#
|
|
|
|
# This is a Python3 script.
|
|
|
|
#
|
|
|
|
# Run it with a -v command-line option to get extra output on
|
|
|
|
# actual translation percentages.
|
2018-12-13 14:50:33 +01:00
|
|
|
import sys
|
2022-03-29 00:11:27 +02:00
|
|
|
import os
|
2020-07-31 10:46:54 +02:00
|
|
|
import argparse
|
2018-12-13 14:50:33 +01:00
|
|
|
|
2020-07-31 12:07:01 +02:00
|
|
|
class TXError(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class TransifexGetter(object):
|
2018-12-13 14:50:33 +01:00
|
|
|
"""
|
2020-07-31 12:07:01 +02:00
|
|
|
Get language data from Transifex.
|
|
|
|
|
|
|
|
The object does all the work in __init__, after that
|
|
|
|
the only relevant data is .languages, a dictionary
|
|
|
|
of language data.
|
2018-12-13 14:50:33 +01:00
|
|
|
"""
|
2020-07-31 12:07:01 +02:00
|
|
|
def __init__(self):
|
|
|
|
token = self.get_tx_credentials()
|
|
|
|
if token is None:
|
|
|
|
raise TXError("Could not get Transifex API token")
|
|
|
|
|
|
|
|
import requests
|
2023-11-07 12:57:39 +01:00
|
|
|
base_url = "https://rest.api.transifex.com/resource_language_stats"
|
|
|
|
project_filter = "filter[project]=o:calamares:p:calamares"
|
|
|
|
resource_filter = "filter[resource]=o:calamares:p:calamares:r:calamares"
|
|
|
|
url = base_url + "?" + project_filter.replace(":", "%3A") + "&" + resource_filter.replace(":", "%3A")
|
|
|
|
headers = {
|
|
|
|
"accept": "application/vnd.api+json",
|
|
|
|
"authorization": "Bearer " + token
|
|
|
|
}
|
|
|
|
|
|
|
|
r = requests.get(url, headers=headers)
|
2020-07-31 12:07:01 +02:00
|
|
|
if r.status_code != 200:
|
|
|
|
raise TXError("Could not get Transifex data from API")
|
|
|
|
|
|
|
|
j = r.json()
|
2023-11-07 12:57:39 +01:00
|
|
|
data = j["data"]
|
|
|
|
|
|
|
|
self.languages = dict()
|
|
|
|
|
|
|
|
for d in data:
|
|
|
|
translated_count = d["attributes"]["translated_strings"]
|
|
|
|
total_count = d["attributes"]["total_strings"]
|
|
|
|
language_key = d["relationships"]["language"]["data"]["id"]
|
|
|
|
assert language_key.startswith("l:")
|
|
|
|
language_key = language_key[2:]
|
|
|
|
self.languages[language_key] = dict(translated=dict(stringcount=translated_count, percentage=(translated_count / total_count)))
|
2020-07-31 12:07:01 +02:00
|
|
|
|
|
|
|
|
|
|
|
def get_tx_credentials(self):
|
|
|
|
"""
|
|
|
|
Gets the API token out of the user's .transifexrc (this is supposed
|
|
|
|
to be secure).
|
|
|
|
"""
|
|
|
|
import configparser
|
|
|
|
import os
|
|
|
|
txconfig_name = os.path.expanduser("~/.transifexrc")
|
|
|
|
try:
|
|
|
|
with open(txconfig_name, "r") as f:
|
|
|
|
parser = configparser.ConfigParser()
|
|
|
|
parser.read_file(f)
|
|
|
|
|
2023-04-24 23:32:35 +02:00
|
|
|
return parser.get("https://app.transifex.com", "password")
|
2020-07-31 12:07:01 +02:00
|
|
|
except IOError as e:
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2020-07-31 12:17:07 +02:00
|
|
|
class BogusGetter(object):
|
|
|
|
"""
|
|
|
|
Fake language data.
|
|
|
|
|
|
|
|
This object pretends to retrieve data, and returns fixed language lists and percentages,
|
|
|
|
for testing purposes without hitting Transifex servers all the time.
|
|
|
|
"""
|
|
|
|
def __init__(self):
|
|
|
|
self.languages = dict()
|
|
|
|
for lang, completion in ( ("sq", 100), ("ar", 44), ("as", 28), ("de", 15), ("da", 4), ("ts", 82) ):
|
|
|
|
self.languages[lang] = dict(translated=dict(stringcount=686, percentage=(completion/100.0)))
|
2018-12-13 14:50:33 +01:00
|
|
|
|
|
|
|
|
2020-07-31 12:22:40 +02:00
|
|
|
class PrintOutputter(object):
|
|
|
|
"""
|
|
|
|
Output via print-statements.
|
|
|
|
"""
|
|
|
|
def __init__(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def print(self, s):
|
|
|
|
print(s)
|
|
|
|
|
|
|
|
def __enter__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def __exit__(self, e, v, tb):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2020-07-31 12:43:52 +02:00
|
|
|
class EditingOutputter(object):
|
|
|
|
"""
|
|
|
|
Edit CMakeLists in-place.
|
|
|
|
"""
|
|
|
|
def __init__(self):
|
|
|
|
with open("CMakeLists.txt", "r") as f:
|
|
|
|
lines = f.readlines()
|
|
|
|
|
|
|
|
mark = None
|
2022-04-25 15:00:52 +02:00
|
|
|
mark_text = None
|
2020-07-31 12:43:52 +02:00
|
|
|
for l in lines:
|
|
|
|
# Note that we didn't strip the lines, so need the \n here
|
|
|
|
if l.startswith("# Total ") and l.endswith(" languages\n"):
|
|
|
|
mark = lines.index(l)
|
2022-04-25 15:00:52 +02:00
|
|
|
mark_text = l
|
2020-07-31 12:43:52 +02:00
|
|
|
break
|
|
|
|
if mark is None:
|
|
|
|
raise TXError("No CMakeLists.txt lines for TX stats found")
|
|
|
|
self.pre_lines = lines[:mark]
|
|
|
|
|
|
|
|
nextmark = mark + 1
|
|
|
|
for l in lines[mark+1:]:
|
2022-04-25 15:00:52 +02:00
|
|
|
nextmark += 1
|
|
|
|
if l.startswith(mark_text):
|
|
|
|
break
|
2024-11-29 13:31:02 +01:00
|
|
|
if nextmark > mark + 150 or nextmark > len(lines) - 4:
|
2020-07-31 12:43:52 +02:00
|
|
|
# Try to catch runaway nextmarks: we know there should
|
|
|
|
# be four set-lines, which are unlikely to be 3 lines each;
|
|
|
|
# similarly the CMakeLists.txt is supposed to end with
|
|
|
|
# some boilerplate.
|
2022-04-25 15:00:52 +02:00
|
|
|
#
|
|
|
|
# However, gersemi will reformat to one-language-per-line,
|
2024-11-29 13:31:02 +01:00
|
|
|
# so we can get really long sections, that's why we use 150 as a limit.
|
2020-07-31 12:43:52 +02:00
|
|
|
raise TXError("Could not find end of TX settings in CMakeLists.txt")
|
|
|
|
self.post_lines = lines[nextmark:]
|
|
|
|
|
|
|
|
self.mid_lines = []
|
2020-07-31 12:46:52 +02:00
|
|
|
print("# Editing CMakeLists.txt in-place")
|
2020-07-31 12:43:52 +02:00
|
|
|
|
|
|
|
def print(self, s):
|
|
|
|
# Add the implicit \n from print()
|
|
|
|
self.mid_lines.append(s + "\n")
|
2020-07-31 12:46:52 +02:00
|
|
|
if s.startswith("#"):
|
|
|
|
print(s)
|
2020-07-31 12:43:52 +02:00
|
|
|
|
|
|
|
def __enter__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def __exit__(self, e, v, tb):
|
|
|
|
if e is None:
|
|
|
|
with open("CMakeLists.txt", "w") as f:
|
|
|
|
f.write("".join(self.pre_lines + self.mid_lines + self.post_lines))
|
2020-07-31 12:46:52 +02:00
|
|
|
print("# CMakeLists.txt updated")
|
2020-07-31 12:43:52 +02:00
|
|
|
|
|
|
|
|
2020-07-31 12:22:40 +02:00
|
|
|
def output_langs(all_langs, outputter, label, filterfunc):
|
2018-12-13 14:50:33 +01:00
|
|
|
"""
|
|
|
|
Output (via print) all of the languages in @p all_langs
|
|
|
|
that satisfy the translation-percentage filter @p filterfunc.
|
|
|
|
Prints a CMake set() command with the @p label as part
|
|
|
|
of the variable name.
|
|
|
|
|
|
|
|
Performs line-wrapping.
|
|
|
|
"""
|
|
|
|
these_langs = [l for s, l in all_langs if filterfunc(s)]
|
2019-02-12 11:26:47 +01:00
|
|
|
out = " ".join(["set( _tx_%s" % label, " ".join(sorted(these_langs)), ")"])
|
2018-12-13 14:50:33 +01:00
|
|
|
width = 68
|
|
|
|
prefix = ""
|
2024-11-29 13:31:02 +01:00
|
|
|
trailer = f" # {len(these_langs)} languages" # Comment at the end of the CMake line
|
2018-12-13 14:50:33 +01:00
|
|
|
while len(out) > width - len(prefix):
|
|
|
|
chunk = out[:out[:width].rfind(" ")]
|
2020-07-31 12:22:40 +02:00
|
|
|
outputter.print("%s%s" % (prefix, chunk))
|
2018-12-13 14:50:33 +01:00
|
|
|
out = out[len(chunk)+1:]
|
|
|
|
prefix = " "
|
2024-11-29 13:31:02 +01:00
|
|
|
outputter.print(f"{prefix}{out}{trailer}")
|
2018-12-13 14:50:33 +01:00
|
|
|
|
2020-07-31 12:07:01 +02:00
|
|
|
|
2020-07-31 12:22:40 +02:00
|
|
|
def get_tx_stats(languages, outputter, verbose):
|
2018-12-13 14:50:33 +01:00
|
|
|
"""
|
|
|
|
Does an API request to Transifex with the given API @p token, getting
|
|
|
|
the translation statistics for the main body of texts. Then prints
|
|
|
|
out CMake settings to replace the _tx_* variables in CMakeLists.txt
|
|
|
|
according to standard criteria.
|
2020-01-27 16:35:56 +01:00
|
|
|
|
|
|
|
If @p verbose is True, prints out language stats as well.
|
2018-12-13 14:50:33 +01:00
|
|
|
"""
|
2019-08-02 09:32:31 +02:00
|
|
|
# Some languages go into the "incomplete" list by definition,
|
|
|
|
# regardless of their completion status: this can have various reasons.
|
2020-01-27 16:35:56 +01:00
|
|
|
#
|
2022-07-03 00:15:56 +02:00
|
|
|
# - (Esperanto wasn't supported until Qt 5.12.2)
|
|
|
|
# - Interlingue still is not supported by the minimum Qt version
|
2019-08-02 09:32:31 +02:00
|
|
|
incomplete_languages = (
|
2020-08-04 12:37:46 +02:00
|
|
|
"ie", # Not supported by Qt at least through 5.15.0
|
2019-08-02 09:32:31 +02:00
|
|
|
)
|
2019-02-12 11:26:47 +01:00
|
|
|
|
2018-12-13 14:50:33 +01:00
|
|
|
all_langs = []
|
2022-04-25 15:00:52 +02:00
|
|
|
mark_text = "# Total %d languages" % len(languages)
|
|
|
|
outputter.print(mark_text)
|
2018-12-13 14:50:33 +01:00
|
|
|
for lang_name in languages:
|
|
|
|
stats = languages[lang_name]["translated"]["percentage"]
|
2020-01-27 16:35:56 +01:00
|
|
|
# Make the by-definition-incomplete languages have a percentage
|
|
|
|
# lower than zero; this way they end up sorted (in -v output)
|
|
|
|
# at the bottom but you can still determine the "actual" percentage.
|
2019-08-02 09:32:31 +02:00
|
|
|
if lang_name in incomplete_languages:
|
2020-01-27 16:35:56 +01:00
|
|
|
stats = -stats
|
2018-12-13 14:50:33 +01:00
|
|
|
all_langs.append((stats, lang_name))
|
|
|
|
|
2020-01-27 16:35:56 +01:00
|
|
|
if verbose:
|
|
|
|
for s, l in sorted(all_langs, reverse=True):
|
2020-07-31 12:22:40 +02:00
|
|
|
outputter.print("# %16s\t%6.2f" % (l, s * 100.0))
|
|
|
|
output_langs(all_langs, outputter, "complete", lambda s : s == 1.0)
|
|
|
|
output_langs(all_langs, outputter, "good", lambda s : 1.0 > s >= 0.75)
|
|
|
|
output_langs(all_langs, outputter, "ok", lambda s : 0.75 > s >= 0.05)
|
|
|
|
output_langs(all_langs, outputter, "incomplete", lambda s : 0.05 > s)
|
2022-04-25 15:00:52 +02:00
|
|
|
outputter.print(mark_text)
|
2018-12-13 14:50:33 +01:00
|
|
|
|
2022-03-29 00:11:27 +02:00
|
|
|
# Audit the languages that are in TX, mapped to git
|
|
|
|
for lang_name in languages:
|
|
|
|
if not os.path.exists("lang/calamares_{}.ts".format(lang_name)):
|
|
|
|
print("# !! Missing translation file for {}".format(lang_name))
|
2022-03-29 00:19:43 +02:00
|
|
|
if not os.path.isdir("lang/python/{}/LC_MESSAGES".format(lang_name)):
|
2022-03-29 00:11:27 +02:00
|
|
|
print("# !! Missing Python translation file for {}".format(lang_name))
|
|
|
|
|
|
|
|
# Audit the files that are in git, mapped to TX
|
|
|
|
special_cases = ("python.pot", "python", "CMakeLists.txt", "txload.cpp", "calamares_i18n.qrc.in")
|
|
|
|
for file_name in os.listdir("lang"):
|
|
|
|
if file_name in special_cases:
|
|
|
|
continue
|
|
|
|
elif file_name.startswith("calamares_") and file_name.endswith(".ts"):
|
|
|
|
key = file_name[10:-3]
|
|
|
|
if not key in languages and not key == "en":
|
|
|
|
print("# !! Translation file for {} not in TX".format(key))
|
|
|
|
elif file_name.startswith("tz_") and file_name.endswith(".ts"):
|
|
|
|
key = file_name[3:-3]
|
2022-03-29 00:21:51 +02:00
|
|
|
if not key in languages and not key == "en":
|
2022-03-29 00:11:27 +02:00
|
|
|
print("# !! Translation file for TZ {} not in TX".format(key))
|
|
|
|
elif file_name.startswith("kb_") and file_name.endswith(".ts"):
|
|
|
|
key = file_name[3:-3]
|
2022-03-29 00:21:51 +02:00
|
|
|
if not key in languages and not key == "en":
|
2022-03-29 00:11:27 +02:00
|
|
|
print("# !! Translation file for KB {} not in TX".format(key))
|
|
|
|
else:
|
|
|
|
print("# !! Weird translation file {} not in TX".format(file_name))
|
|
|
|
|
2022-03-29 00:17:31 +02:00
|
|
|
# Audit the python translation files that are in git, mapped to TX
|
|
|
|
for file_name in os.listdir("lang/python"):
|
|
|
|
if file_name not in languages:
|
|
|
|
print("# !! Translation file for Python {} not in TX".format(file_name))
|
|
|
|
|
2018-12-13 14:50:33 +01:00
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2020-07-31 10:46:54 +02:00
|
|
|
parser = argparse.ArgumentParser(description="Update Transifex Statistics")
|
|
|
|
parser.add_argument("--verbose", "-v", help="Show statistics", action="store_true")
|
2020-07-31 12:17:07 +02:00
|
|
|
parser.add_argument("--bogus", "-n", help="Use bogus data (do not query Transifex)", action="store_true")
|
2020-07-31 12:43:52 +02:00
|
|
|
parser.add_argument("--edit", "-e", help="Edit CMakeLists.txt in-place", action="store_true")
|
2020-07-31 10:46:54 +02:00
|
|
|
args = parser.parse_args()
|
2020-07-31 12:07:01 +02:00
|
|
|
try:
|
2020-07-31 12:17:07 +02:00
|
|
|
if args.bogus:
|
|
|
|
getter = BogusGetter()
|
|
|
|
else:
|
|
|
|
getter = TransifexGetter()
|
2020-07-31 12:43:52 +02:00
|
|
|
if args.edit:
|
|
|
|
outputter = EditingOutputter()
|
|
|
|
else:
|
|
|
|
outputter = PrintOutputter()
|
|
|
|
with outputter:
|
2020-07-31 12:22:40 +02:00
|
|
|
return get_tx_stats(getter.languages, outputter, args.verbose)
|
2020-07-31 12:07:01 +02:00
|
|
|
except TXError as e:
|
|
|
|
print("! " + str(e))
|
|
|
|
return 1;
|
2018-12-13 14:50:33 +01:00
|
|
|
return 0
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
sys.exit(main())
|