[locale] Fix up handling of regional variants, country-specifics

- expand tests to include Serbian variants
- massage detection code to do better filtering based on
  country-selection (so "American English" doesn't later
  pick "English" with Antigua and Barbados for locale)
This commit is contained in:
Adriaan de Groot 2022-06-19 15:25:23 +02:00
parent 18626901d3
commit 82b19a6314
2 changed files with 78 additions and 31 deletions

View File

@ -62,7 +62,7 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale,
cDebug() << Logger::SubEntry << "Matching" << linesForLanguage;
QString lang;
if ( linesForLanguage.length() == 0 || languageLocale.isEmpty() )
if ( linesForLanguage.isEmpty() || languageLocale.isEmpty() )
{
lang = "en_US.UTF-8";
}
@ -72,6 +72,16 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale,
}
// lang could still be empty if we found multiple locales that satisfy myLanguage
const QString combinedLanguageAndCountry = QString( "%1_%2" ).arg( language ).arg( countryCode );
if ( lang.isEmpty() && region.isEmpty() )
{
auto l = linesForLanguage.filter(
QRegularExpression( combinedLanguageAndCountry + "[._]" ) ); // no regional variants
if ( l.length() == 1 )
{
lang = l.first();
}
}
// The following block was inspired by Ubiquity, scripts/localechooser-apply.
// No copyright statement found in file, assuming GPL v2 or later.
@ -178,15 +188,33 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale,
// We make a proposed locale based on the UI language and the timezone's country. There is no
// guarantee that this will be a valid, supported locale (often it won't).
QString lc_formats;
QString combined = QString( "%1_%2" ).arg( language ).arg( countryCode );
// We look up if it's a supported locale.
for ( const QString& line : availableLocales )
const QString combined = QString( "%1_%2" ).arg( language ).arg( countryCode );
if ( lang.isEmpty() )
{
if ( line.startsWith( combined ) )
cDebug() << Logger::SubEntry << "Looking up formats for" << combinedLanguageAndCountry;
// We look up if it's a supported locale.
for ( const QString& line : availableLocales )
{
lang = line;
lc_formats = line;
break;
if ( line.startsWith( combinedLanguageAndCountry ) )
{
lang = line;
lc_formats = line;
break;
}
}
}
else
{
if ( availableLocales.contains( lang ) )
{
cDebug() << Logger::SubEntry << "Exact formats match for language tag" << lang;
lc_formats = lang;
}
else if ( availableLocales.contains( combinedLanguageAndCountry ) )
{
cDebug() << Logger::SubEntry << "Exact formats match for combined" << combinedLanguageAndCountry;
lang = combinedLanguageAndCountry;
lc_formats = combinedLanguageAndCountry;
}
}

View File

@ -277,50 +277,59 @@ LocaleTests::testLanguageDetection_data()
QTest::addColumn< QString >( "country" );
QTest::addColumn< QString >( "expected" );
QTest::newRow( "english (US)" ) << QStringLiteral( "en" ) << QStringLiteral( "US" ) << QStringLiteral( "en_US" );
QTest::newRow( "english (CA)" ) << QStringLiteral( "en" ) << QStringLiteral( "CA" ) << QStringLiteral( "en" );
QTest::newRow( "english (GB)" ) << QStringLiteral( "en" ) << QStringLiteral( "GB" ) << QStringLiteral( "en_GB" );
QTest::newRow( "english (US)" ) << QStringLiteral( "en" ) << QStringLiteral( "US" )
<< QStringLiteral( "en_US.UTF-8" );
QTest::newRow( "english (CA)" ) << QStringLiteral( "en" ) << QStringLiteral( "CA" )
<< QStringLiteral( "en" ); // because it's first in the list
QTest::newRow( "english (GB)" ) << QStringLiteral( "en" ) << QStringLiteral( "GB" )
<< QStringLiteral( "en_GB.UTF-8" );
QTest::newRow( "english (NL)" ) << QStringLiteral( "en" ) << QStringLiteral( "NL" ) << QStringLiteral( "en" );
QTest::newRow( "portuguese (PT)" ) << QStringLiteral( "pt" ) << QStringLiteral( "PT" ) << QStringLiteral( "pt" );
QTest::newRow( "portuguese (NL)" ) << QStringLiteral( "pt" ) << QStringLiteral( "NL" ) << QStringLiteral( "pt" );
QTest::newRow( "portuguese (BR)" ) << QStringLiteral( "pt" ) << QStringLiteral( "BR" ) << QStringLiteral( "pt_BR" );
QTest::newRow( "portuguese (PT)" ) << QStringLiteral( "pt" ) << QStringLiteral( "PT" )
<< QStringLiteral( "pt_PT.UTF-8" );
QTest::newRow( "portuguese (NL)" ) << QStringLiteral( "pt" ) << QStringLiteral( "NL" )
<< QStringLiteral( "pt_BR.UTF-8" ); // first
QTest::newRow( "portuguese (BR)" ) << QStringLiteral( "pt" ) << QStringLiteral( "BR" )
<< QStringLiteral( "pt_BR.UTF-8" );
QTest::newRow( "catalan ()" ) << QStringLiteral( "ca" ) << QStringLiteral( "" )
<< QStringLiteral( "ca_ES" ); // no country given? Matches first
QTest::newRow( "catalan (ES)" ) << QStringLiteral( "ca" ) << QStringLiteral( "ES" ) << QStringLiteral( "ca_ES" );
QTest::newRow( "catalan (NL)" ) << QStringLiteral( "ca" ) << QStringLiteral( "NL" ) << QStringLiteral( "ca" );
QTest::newRow( "catalan (@valencia)" )
<< QStringLiteral( "ca@valencia" ) << QStringLiteral( "ES" ) << QStringLiteral( "ca@valencia" );
<< QStringLiteral( "ca_AD.UTF-8" ); // no country given? Matches first
QTest::newRow( "catalan (ES)" ) << QStringLiteral( "ca" ) << QStringLiteral( "ES" )
<< QStringLiteral( "ca_ES.UTF-8" );
QTest::newRow( "catalan (NL)" ) << QStringLiteral( "ca" ) << QStringLiteral( "NL" )
<< QStringLiteral( "ca_AD.UTF-8" );
QTest::newRow( "catalan (@valencia)" ) << QStringLiteral( "ca@valencia" ) << QStringLiteral( "ES" )
<< QStringLiteral( "ca_ES@valencia" ); // Prefers regional variant
QTest::newRow( "catalan (@valencia_NL)" )
<< QStringLiteral( "ca@valencia" ) << QStringLiteral( "NL" ) << QStringLiteral( "ca@valencia" );
<< QStringLiteral( "ca@valencia" ) << QStringLiteral( "NL" ) << QStringLiteral( "ca_ES@valencia" );
}
/*
* This list of available locales was created by grepping `/etc/locale.gen`
* on an EndeavourOS ISO image for a handful of representative locales.
*/
static const QStringList availableLocales {
"nl_AW", "nl_BE.UTF-8", "nl_NL.UTF-8", "en", "en_AU.UTF-8", "en_US.UTF-8", "en_GB.UTF-8",
"ca_AD.UTF-8", "ca_ES.UTF-8", "ca_ES@valencia", "sr_ME", "sr_RS", "sr_RS@latin", "pt_BR.UTF-8",
"pt_PT.UTF-8", "es_AR.UTF-8", "es_ES.UTF-8", "es_MX.UTF-8",
};
void
LocaleTests::testLanguageDetection()
{
// This **might** be representative
static const QStringList availableLocales { "en.UTF-8", "en_US.UTF-8", "en_GB.UTF-8", "fr.UTF-8",
"pt.UTF-8", "pt_BR.UTF-8", "ca.UTF-8", "ca_ES.UTF-8",
"es.UTF-8", "es_ES.UTF-8", "sr.UTF-8", "sr@latin.UTF-8",
"ca@valencia.UTF-8" };
QFETCH( QString, locale );
QFETCH( QString, country );
QFETCH( QString, expected );
auto r = LocaleConfiguration::fromLanguageAndLocation( locale, availableLocales, country );
QCOMPARE( r.language(), expected + QStringLiteral( ".UTF-8" ) );
QCOMPARE( r.language(), expected );
}
void
LocaleTests::testLanguageDetectionValencia()
{
Logger::setupLogLevel( Logger::LOGDEBUG );
static const QStringList availableLocales { "nl_AW", "nl_BE.UTF-8", "nl_NL.UTF-8",
"en_AU.UTF-8", "en_US.UTF-8", "en_GB.UTF-8",
"ca_AD.UTF-8", "ca_ES.UTF-8", "ca_ES@valencia" };
{
auto r = LocaleConfiguration::fromLanguageAndLocation(
@ -330,7 +339,17 @@ LocaleTests::testLanguageDetectionValencia()
{
auto r = LocaleConfiguration::fromLanguageAndLocation(
QStringLiteral( "ca@valencia" ), availableLocales, QStringLiteral( "NL" ) );
QCOMPARE( r.language(), "ca@valencia" );
QCOMPARE( r.language(), "ca_ES@valencia" );
}
{
auto r = LocaleConfiguration::fromLanguageAndLocation(
QStringLiteral( "sr" ), availableLocales, QStringLiteral( "NL" ) );
QCOMPARE( r.language(), "sr_ME" ); // Because that one is first in the list
}
{
auto r = LocaleConfiguration::fromLanguageAndLocation(
QStringLiteral( "sr@latin" ), availableLocales, QStringLiteral( "NL" ) );
QCOMPARE( r.language(), "sr_RS@latin" );
}
}