Merge branch 'issue-2008' into 3.2.x-stable

FIXES #2008
This commit is contained in:
Adriaan de Groot 2022-08-24 21:28:06 +02:00
commit f277cbb1e2
6 changed files with 348 additions and 141 deletions

View File

@ -22,6 +22,7 @@ calamares_add_plugin(locale
Config.cpp
LCLocaleDialog.cpp
LocaleConfiguration.cpp
LocaleNames.cpp
LocalePage.cpp
LocaleViewStep.cpp
SetTimezoneJob.cpp
@ -39,15 +40,7 @@ calamares_add_plugin(locale
calamares_add_test(
localetest
SOURCES
Tests.cpp
Config.cpp
LocaleConfiguration.cpp
SetTimezoneJob.cpp
timezonewidget/TimeZoneImage.cpp
DEFINITIONS
SOURCE_DIR="${CMAKE_CURRENT_LIST_DIR}/images"
DEBUG_TIMEZONES=1
LIBRARIES
Qt5::Gui
SOURCES Tests.cpp Config.cpp LocaleConfiguration.cpp LocaleNames.cpp SetTimezoneJob.cpp timezonewidget/TimeZoneImage.cpp
DEFINITIONS SOURCE_DIR="${CMAKE_CURRENT_LIST_DIR}/images" DEBUG_TIMEZONES=1
LIBRARIES Qt5::Gui
)

View File

@ -9,11 +9,13 @@
*/
#include "LocaleConfiguration.h"
#include "LocaleNames.h"
#include "utils/Logger.h"
#include <QLocale>
#include <QRegularExpression>
#include <QVector>
LocaleConfiguration::LocaleConfiguration()
: explicit_lang( false )
@ -40,6 +42,106 @@ LocaleConfiguration::setLanguage( const QString& localeName )
m_lang = localeName;
}
static LocaleNameParts
updateCountry( LocaleNameParts p, const QString& country )
{
p.country = country;
return p;
}
static QPair< int, LocaleNameParts >
identifyBestLanguageMatch( const LocaleNameParts& referenceLocale, QVector< LocaleNameParts >& others )
{
std::sort( others.begin(),
others.end(),
[ & ]( const LocaleNameParts& lhs, const LocaleNameParts& rhs )
{ return referenceLocale.similarity( lhs ) < referenceLocale.similarity( rhs ); } );
// The best match is at the end
LocaleNameParts best_match = others.last();
if ( !( referenceLocale.similarity( best_match ) > LocaleNameParts::no_match ) )
{
cDebug() << Logger::SubEntry << "Got no good match for" << referenceLocale.name();
return { LocaleNameParts::no_match, LocaleNameParts {} };
}
else
{
cDebug() << Logger::SubEntry << "Got best match for" << referenceLocale.name() << "as" << best_match.name();
return { referenceLocale.similarity( best_match ), best_match };
}
}
/** @brief Returns the QString from @p availableLocales that best-matches.
*/
static LocaleNameParts
identifyBestLanguageMatch( const QString& languageLocale,
const QStringList& availableLocales,
const QString& countryCode )
{
const QString default_lang = QStringLiteral( "en_US.UTF-8" );
const LocaleNameParts self = LocaleNameParts::fromName( languageLocale );
if ( self.isValid() && !availableLocales.isEmpty() )
{
QVector< LocaleNameParts > others;
others.resize( availableLocales.length() ); // Makes default structs
std::transform( availableLocales.begin(), availableLocales.end(), others.begin(), LocaleNameParts::fromName );
// Keep track of the best match in various attempts
int best_score = LocaleNameParts::no_match;
LocaleNameParts best_match;
// Check with the unmodified language setting
{
auto [ score, match ] = identifyBestLanguageMatch( self, others );
if ( score >= LocaleNameParts::complete_match )
{
return match;
}
else if ( score > best_score )
{
best_match = match;
}
}
// .. but it might match **better** with the chosen location country Code
{
auto [ score, match ] = identifyBestLanguageMatch( updateCountry( self, countryCode ), others );
if ( score >= LocaleNameParts::complete_match )
{
return match;
}
else if ( score > best_score )
{
best_match = match;
}
}
// .. or better yet with the QLocale-derived country
{
const QString localeCountry = LocaleNameParts::fromName( QLocale( languageLocale ).name() ).country;
auto [ score, match ] = identifyBestLanguageMatch( updateCountry( self, localeCountry ), others );
if ( score >= LocaleNameParts::complete_match )
{
return match;
}
else if ( score > best_score )
{
best_match = match;
}
}
if ( best_match.isValid() )
{
cDebug() << Logger::SubEntry << "Matched best with" << best_match.name();
return best_match;
}
}
// Else we have an unrecognized or unsupported locale, all we can do is go with
// en_US.UTF-8 UTF-8. This completes all default language setting guesswork.
return LocaleNameParts::fromName( default_lang );
}
LocaleConfiguration
LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale,
@ -47,100 +149,7 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale,
const QString& countryCode )
{
cDebug() << "Mapping" << languageLocale << "in" << countryCode << "to locale.";
QString language = languageLocale.split( '_' ).first();
QString region;
if ( language.contains( '@' ) )
{
auto r = language.split( '@' );
language = r.first();
region = r[ 1 ]; // second()
}
// Either an exact match, or the whole language part matches
// (followed by .<encoding> or _<country>
QStringList linesForLanguage = availableLocales.filter( QRegularExpression( language + "[._]" ) );
cDebug() << Logger::SubEntry << "Matching" << linesForLanguage;
QString lang;
if ( linesForLanguage.isEmpty() || languageLocale.isEmpty() )
{
lang = "en_US.UTF-8";
}
else if ( linesForLanguage.length() == 1 )
{
lang = linesForLanguage.first();
}
// lang could still be empty if we found multiple locales that satisfy myLanguage
const QString combinedLanguageAndCountry = QString( "%1_%2" ).arg( language ).arg( countryCode );
if ( lang.isEmpty() && region.isEmpty() )
{
auto l = linesForLanguage.filter(
QRegularExpression( combinedLanguageAndCountry + "[._]" ) ); // no regional variants
if ( l.length() == 1 )
{
lang = l.first();
}
}
// The following block was inspired by Ubiquity, scripts/localechooser-apply.
// No copyright statement found in file, assuming GPL v2 or later.
/* # In the special cases of Portuguese and Chinese, selecting a
# different location may imply a different dialect of the language.
# In such cases, make LANG reflect the selected language (for
# messages, character types, and collation) and make the other
# locale categories reflect the selected location. */
if ( language == "pt" || language == "zh" )
{
cDebug() << Logger::SubEntry << "Special-case Portuguese and Chinese";
QString proposedLocale = QString( "%1_%2" ).arg( language ).arg( countryCode );
for ( const QString& line : linesForLanguage )
{
if ( line.contains( proposedLocale ) )
{
cDebug() << Logger::SubEntry << "Country-variant" << line << "chosen.";
lang = line;
break;
}
}
}
if ( lang.isEmpty() && !region.isEmpty() )
{
cDebug() << Logger::SubEntry << "Special-case region @" << region;
QString proposedRegion = QString( "@%1" ).arg( region );
for ( const QString& line : linesForLanguage )
{
if ( line.startsWith( language ) && line.contains( proposedRegion ) )
{
cDebug() << Logger::SubEntry << "Region-variant" << line << "chosen.";
lang = line;
break;
}
}
}
// If we found no good way to set a default lang, do a search with the whole
// language locale and pick the first result, if any.
if ( lang.isEmpty() )
{
for ( const QString& line : availableLocales )
{
if ( line.startsWith( languageLocale ) )
{
lang = line;
break;
}
}
}
// Else we have an unrecognized or unsupported locale, all we can do is go with
// en_US.UTF-8 UTF-8. This completes all default language setting guesswork.
if ( lang.isEmpty() )
{
lang = "en_US.UTF-8";
}
const auto bestLocale = identifyBestLanguageMatch( languageLocale, availableLocales, countryCode );
// The following block was inspired by Ubiquity, scripts/localechooser-apply.
// No copyright statement found in file, assuming GPL v2 or later.
@ -188,34 +197,16 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale,
// We make a proposed locale based on the UI language and the timezone's country. There is no
// guarantee that this will be a valid, supported locale (often it won't).
QString lc_formats;
const QString combined = QString( "%1_%2" ).arg( language ).arg( countryCode );
if ( lang.isEmpty() )
const QString combined = QString( "%1_%2" ).arg( bestLocale.language ).arg( countryCode );
if ( availableLocales.contains( bestLocale.language ) )
{
cDebug() << Logger::SubEntry << "Looking up formats for" << combinedLanguageAndCountry;
// We look up if it's a supported locale.
for ( const QString& line : availableLocales )
{
if ( line.startsWith( combinedLanguageAndCountry ) )
{
lang = line;
lc_formats = line;
break;
cDebug() << Logger::SubEntry << "Exact formats match for language tag" << bestLocale.language;
lc_formats = bestLocale.language;
}
}
}
else
else if ( availableLocales.contains( combined ) )
{
if ( availableLocales.contains( lang ) )
{
cDebug() << Logger::SubEntry << "Exact formats match for language tag" << lang;
lc_formats = lang;
}
else if ( availableLocales.contains( combinedLanguageAndCountry ) )
{
cDebug() << Logger::SubEntry << "Exact formats match for combined" << combinedLanguageAndCountry;
lang = combinedLanguageAndCountry;
lc_formats = combinedLanguageAndCountry;
}
cDebug() << Logger::SubEntry << "Exact formats match for combined" << combined;
lc_formats = combined;
}
if ( lc_formats.isEmpty() )
@ -303,12 +294,7 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale,
// If we cannot make a good choice for a given country we go with the LANG
// setting, which defaults to en_US.UTF-8 UTF-8 if all else fails.
if ( lc_formats.isEmpty() )
{
lc_formats = lang;
}
return LocaleConfiguration( lang, lc_formats );
return LocaleConfiguration( bestLocale.name(), lc_formats.isEmpty() ? bestLocale.name() : lc_formats );
}

View File

@ -0,0 +1,90 @@
/* === This file is part of Calamares - <https://calamares.io> ===
*
* SPDX-FileCopyrightText: 2022 Adriaan de Groot <groot@kde.org>
* SPDX-License-Identifier: GPL-3.0-or-later
*
* Calamares is Free Software: see the License-Identifier above.
*
*/
#include "LocaleNames.h"
#include "utils/Logger.h"
#include <QRegularExpression>
LocaleNameParts
LocaleNameParts::fromName( const QString& name )
{
auto requireAndRemoveLeadingChar = []( QChar c, QString s )
{
if ( s.startsWith( c ) )
{
return s.remove( 0, 1 );
}
else
{
return QString();
}
};
auto parts = QRegularExpression( "^([a-zA-Z]+)(_[a-zA-Z]+)?(\\.[-a-zA-Z0-9]+)?(@[a-zA-Z]+)?" ).match( name );
const QString calamaresLanguage = parts.captured( 1 );
const QString calamaresCountry = requireAndRemoveLeadingChar( '_', parts.captured( 2 ) );
const QString calamaresEncoding = requireAndRemoveLeadingChar( '.', parts.captured( 3 ) );
const QString calamaresRegion = requireAndRemoveLeadingChar( '@', parts.captured( 4 ) );
if ( calamaresLanguage.isEmpty() )
{
return LocaleNameParts {};
}
else
{
return LocaleNameParts { calamaresLanguage, calamaresCountry, calamaresRegion, calamaresEncoding };
}
}
QString
LocaleNameParts::name() const
{
// We don't want QStringView to a temporary; force conversion
auto insertLeadingChar = []( QChar c, QString s ) -> QString
{
if ( s.isEmpty() )
{
return QString();
}
else
{
return c + s;
}
};
if ( !isValid() )
{
return QString();
}
else
{
return language + insertLeadingChar( '_', country ) + insertLeadingChar( '.', encoding )
+ insertLeadingChar( '@', region );
}
}
int
LocaleNameParts::similarity( const LocaleNameParts& other ) const
{
if ( !isValid() || !other.isValid() )
{
return 0;
}
if ( language != other.language )
{
return 0;
}
const auto matched_region = ( region == other.region ? 30 : 0 );
const auto matched_country = ( country == other.country ? ( country.isEmpty() ? 10 : 20 ) : 0 );
const auto no_other_country_given = ( ( country != other.country && other.country.isEmpty() ) ? 10 : 0 );
return 50 + matched_region + matched_country + no_other_country_given;
}

View File

@ -0,0 +1,46 @@
/* === This file is part of Calamares - <https://calamares.io> ===
*
* SPDX-FileCopyrightText: 2022 Adriaan de Groot <groot@kde.org>
* SPDX-License-Identifier: GPL-3.0-or-later
*
* Calamares is Free Software: see the License-Identifier above.
*
*/
#ifndef LOCALENAMES_H
#define LOCALENAMES_H
#include <QString>
/** @brief parts of a locale-name (e.g. "ar_LY.UTF-8", split apart)
*
* These are created from lines in `/usr/share/i18n/SUPPORTED`,
* which lists all the locales supported by the system (there
* are also other sources of the same).
*
*/
struct LocaleNameParts
{
QString language; // e.g. "ar"
QString country; // e.g. "LY" (may be empty)
QString region; // e.g. "@valencia" (may be empty)
QString encoding; // e.g. "UTF-8" (may be empty)
bool isValid() const { return !language.isEmpty(); }
QString name() const;
static LocaleNameParts fromName( const QString& name );
static inline constexpr const int no_match = 0;
static inline constexpr const int complete_match = 100;
/** @brief Compute similarity-score with another locale-name.
*
* Similarity is driven by language and region, then country.
* Returns a number between 0 (no similarity, e.g. the
* language is different) and 100 (complete match).
*/
int similarity( const LocaleNameParts& other ) const;
};
#endif

View File

@ -9,6 +9,7 @@
#include "Config.h"
#include "LocaleConfiguration.h"
#include "LocaleNames.h"
#include "timezonewidget/TimeZoneImage.h"
#include "CalamaresVersion.h"
@ -50,12 +51,16 @@ private Q_SLOTS:
void testLanguageDetection();
void testLanguageDetectionValencia();
// Check realistic language mapping for issue 2008
// Check that the test-data is available and ok
void testKDENeonLanguageData();
void testLocaleNameParts();
// Check realistic language mapping for issue 2008
void testLanguageMappingNeon_data();
void testLanguageMappingNeon();
void testLanguageMappingFreeBSD_data();
void testLanguageMappingFreeBSD();
void testLanguageSimilarity();
private:
QStringList m_KDEneonLocales;
@ -395,6 +400,10 @@ splitTestFileIntoLines( const QString& filename )
void
LocaleTests::testKDENeonLanguageData()
{
if ( !m_KDEneonLocales.isEmpty() )
{
return;
}
const QStringList neonLocales = splitTestFileIntoLines( QStringLiteral( "locale-data-neon" ) );
cDebug() << "Loaded KDE neon locales test data" << neonLocales.front() << "to" << neonLocales.back();
QCOMPARE( neonLocales.length(), 318 ); // wc -l tells me 318 lines
@ -415,7 +424,7 @@ LocaleTests::MappingData()
// Tired of writing QString or QStringLiteral all the time.
auto l = []( const char* p ) { return QString::fromUtf8( p ); };
auto u = [](){ return QString(); };
auto u = []() { return QString(); };
// The KDEneon columns include the .UTF-8 from the source data
// The FreeBSD columns may have u() to indicate "same as KDEneon",
@ -445,12 +454,14 @@ LocaleTests::MappingData()
}
void LocaleTests::testLanguageMappingNeon_data()
void
LocaleTests::testLanguageMappingNeon_data()
{
MappingData();
}
void LocaleTests::testLanguageMappingFreeBSD_data()
void
LocaleTests::testLanguageMappingFreeBSD_data()
{
MappingData();
}
@ -458,6 +469,7 @@ void LocaleTests::testLanguageMappingFreeBSD_data()
void
LocaleTests::testLanguageMappingNeon()
{
testKDENeonLanguageData();
QVERIFY( !m_KDEneonLocales.isEmpty() );
QFETCH( QString, selectedLanguage );
@ -474,6 +486,7 @@ LocaleTests::testLanguageMappingNeon()
void
LocaleTests::testLanguageMappingFreeBSD()
{
testKDENeonLanguageData();
QVERIFY( !m_FreeBSDLocales.isEmpty() );
QFETCH( QString, selectedLanguage );
@ -488,6 +501,84 @@ LocaleTests::testLanguageMappingFreeBSD()
QCOMPARE( bsd.language(), expected );
}
void
LocaleTests::testLocaleNameParts()
{
testKDENeonLanguageData();
QVERIFY( !m_FreeBSDLocales.isEmpty() );
QVERIFY( !m_KDEneonLocales.isEmpty() );
// Example constant locales
{
auto c_parts = LocaleNameParts::fromName( QStringLiteral( "nl_NL.UTF-8" ) );
QCOMPARE( c_parts.language, QStringLiteral( "nl" ) );
QCOMPARE( c_parts.country, QStringLiteral( "NL" ) );
QCOMPARE( c_parts.encoding, QStringLiteral( "UTF-8" ) );
QVERIFY( c_parts.region.isEmpty() );
}
{
auto c_parts = LocaleNameParts::fromName( QStringLiteral( "C.UTF-8" ) );
QCOMPARE( c_parts.language, QStringLiteral( "C" ) );
QVERIFY( c_parts.country.isEmpty() );
QCOMPARE( c_parts.encoding, QStringLiteral( "UTF-8" ) );
QVERIFY( c_parts.region.isEmpty() );
}
// Check all the loaded test locales
for ( const auto& s : m_FreeBSDLocales )
{
auto parts = LocaleNameParts::fromName( s );
QVERIFY( parts.isValid() );
QCOMPARE( parts.name(), s );
}
for ( const auto& s : m_KDEneonLocales )
{
auto parts = LocaleNameParts::fromName( s );
QVERIFY( parts.isValid() );
QCOMPARE( parts.name(), s );
}
}
void
LocaleTests::testLanguageSimilarity()
{
// Empty
{
QCOMPARE( LocaleNameParts().similarity( LocaleNameParts() ), 0 );
}
// Some simple Dutch situations
{
auto nl_parts = LocaleNameParts::fromName( QStringLiteral( "nl_NL.UTF-8" ) );
auto be_parts = LocaleNameParts::fromName( QStringLiteral( "nl_BE.UTF-8" ) );
auto nl_short_parts = LocaleNameParts::fromName( QStringLiteral( "nl" ) );
QCOMPARE( nl_parts.similarity( nl_parts ), 100 );
QCOMPARE( nl_parts.similarity( LocaleNameParts() ), 0 );
QCOMPARE( nl_parts.similarity( be_parts ), 80 ); // Language + (empty) region match
QCOMPARE( nl_parts.similarity( nl_short_parts ), 90 );
}
// Everything matches itself
{
if ( m_KDEneonLocales.isEmpty() )
{
testKDENeonLanguageData();
}
QVERIFY( !m_FreeBSDLocales.isEmpty() );
QVERIFY( !m_KDEneonLocales.isEmpty() );
for ( const auto& l : m_KDEneonLocales )
{
auto locale_name = LocaleNameParts::fromName( l );
auto self_similarity = locale_name.similarity( locale_name );
if ( self_similarity != 100 )
{
cDebug() << "Locale" << l << "is unusual.";
}
QCOMPARE( self_similarity, 100 );
}
}
}
#include "utils/moc-warnings.h"

View File

@ -41,8 +41,9 @@ calamares_add_plugin(localeq
EXPORT_MACRO PLUGINDLLEXPORT_PRO
SOURCES
LocaleQmlViewStep.cpp
${_locale}/LocaleConfiguration.cpp
${_locale}/Config.cpp
${_locale}/LocaleConfiguration.cpp
${_locale}/LocaleNames.cpp
${_locale}/SetTimezoneJob.cpp
RESOURCES
localeq.qrc