From 73628b13eaf40b7bc63877c55b813a7681791ba6 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 19 Jul 2022 18:05:24 +0200 Subject: [PATCH 01/14] [locale] Add test for language-mapping Adds specific data from KDE neon and expected mappings. The test fails right now because the mapping is incorrect. --- src/modules/locale/Tests.cpp | 89 ++++++ src/modules/locale/tests/locale-data-neon | 318 ++++++++++++++++++++++ 2 files changed, 407 insertions(+) create mode 100644 src/modules/locale/tests/locale-data-neon diff --git a/src/modules/locale/Tests.cpp b/src/modules/locale/Tests.cpp index 23f9b5b3e..db5751177 100644 --- a/src/modules/locale/Tests.cpp +++ b/src/modules/locale/Tests.cpp @@ -11,7 +11,9 @@ #include "LocaleConfiguration.h" #include "timezonewidget/TimeZoneImage.h" +#include "Settings.h" #include "locale/TimeZone.h" +#include "locale/TranslationsModel.h" #include "utils/Logger.h" #include @@ -43,6 +45,15 @@ private Q_SLOTS: void testLanguageDetection_data(); void testLanguageDetection(); void testLanguageDetectionValencia(); + + // Check realistic language mapping for issue 2008 + void testKDENeonLanguageData(); + void testLanguageMapping_data(); + void testLanguageMapping(); + +private: + QStringList m_KDEneonLocales; + QStringList m_FreeBSDLocales; }; QTEST_MAIN( LocaleTests ) @@ -55,6 +66,12 @@ LocaleTests::~LocaleTests() {} void LocaleTests::initTestCase() { + Logger::setupLogLevel( Logger::LOGDEBUG ); + const auto* settings = Calamares::Settings::instance(); + if ( !settings ) + { + (void)new Calamares::Settings( true ); + } } void @@ -353,6 +370,78 @@ LocaleTests::testLanguageDetectionValencia() } } +static QStringList +splitTestFileIntoLines( const QString& filename ) +{ + // BUILD_AS_TEST is the source-directory path + const QFileInfo fi( QString( "%1/tests/%2" ).arg( BUILD_AS_TEST, filename ) ); + const QString path = fi.absoluteFilePath(); + QFile testData( path ); + if ( testData.open( QIODevice::ReadOnly ) ) + { + return QString::fromUtf8( testData.readAll() ).split( '\n', Qt::SkipEmptyParts ); + } + return QStringList {}; +} + +void +LocaleTests::testKDENeonLanguageData() +{ + const QStringList neonLocales = splitTestFileIntoLines( QStringLiteral( "locale-data-neon" ) ); + cDebug() << "Loaded KDE neon locales test data" << neonLocales.front() << "to" << neonLocales.back(); + QCOMPARE( neonLocales.length(), 318 ); // wc -l tells me 318 lines + + m_KDEneonLocales = neonLocales; +} + +void +LocaleTests::testLanguageMapping_data() +{ + QTest::addColumn< QString >( "selectedLanguage" ); + QTest::addColumn< QString >( "KDEneonLanguage" ); + + // Tired of writing QString or QStringLiteral all the time. + auto l = []( const char* p ) { return QString::fromUtf8( p ); }; + + // The KDEneon columns include the .UTF-8 from the source data + // + // Each column shows how a language -- which can be selected from the + // welcome page, and is inserted into GS as the language key that + // Calamares knows -- should be mapped to a supported system locale. + // + // All the mappings are for ".. in NL", which can trigger minor variation + // if there are languages with a _NL variant (e.g. nl_NL and nl_BE). + + // clang-format off + QTest::newRow( "en " ) << l( "en" ) << l( "en_US.UTF-8" ); + QTest::newRow( "en_GB" ) << l( "en_GB" ) << l( "en_GB.UTF-8" ); + QTest::newRow( "ca " ) << l( "ca" ) << l( "ca_ES.UTF-8" ); + QTest::newRow( "ca@vl" ) << l( "ca@valencia" ) << l( "ca_ES@valencia" ); + QTest::newRow( "sr " ) << l( "sr" ) << l( "sr_RS" ); + QTest::newRow( "sr@lt" ) << l( "sr@latin" ) << l( "sr_RS@latin" ); + QTest::newRow( "pt_PT" ) << l( "pt_PT" ) << l( "pt_PT.UTF-8" ); + QTest::newRow( "pt_BR" ) << l( "pt_BR" ) << l( "pt_BR.UTF-8" ); + QTest::newRow( "nl " ) << l( "nl" ) << l( "nl_NL.UTF-8" ); + QTest::newRow( "zh_TW" ) << l( "zh_TW" ) << l( "zh_TW.UTF-8" ); + // clang-format on +} + + +void +LocaleTests::testLanguageMapping() +{ + QVERIFY( !m_KDEneonLocales.isEmpty() ); + + QFETCH( QString, selectedLanguage ); + QFETCH( QString, KDEneonLanguage ); + + QVERIFY( Calamares::Locale::availableLanguages().contains( selectedLanguage ) ); + + const auto r = LocaleConfiguration::fromLanguageAndLocation( + ( selectedLanguage ), m_KDEneonLocales, QStringLiteral( "NL" ) ); + QCOMPARE( r.language(), KDEneonLanguage ); +} + #include "utils/moc-warnings.h" diff --git a/src/modules/locale/tests/locale-data-neon b/src/modules/locale/tests/locale-data-neon new file mode 100644 index 000000000..0f0254d01 --- /dev/null +++ b/src/modules/locale/tests/locale-data-neon @@ -0,0 +1,318 @@ +aa_DJ.UTF-8 +aa_ER +aa_ER@saaho +aa_ET +af_ZA.UTF-8 +agr_PE +ak_GH +am_ET +an_ES.UTF-8 +anp_IN +ar_AE.UTF-8 +ar_BH.UTF-8 +ar_DZ.UTF-8 +ar_EG.UTF-8 +ar_IN +ar_IQ.UTF-8 +ar_JO.UTF-8 +ar_KW.UTF-8 +ar_LB.UTF-8 +ar_LY.UTF-8 +ar_MA.UTF-8 +ar_OM.UTF-8 +ar_QA.UTF-8 +ar_SA.UTF-8 +ar_SD.UTF-8 +ar_SS +ar_SY.UTF-8 +ar_TN.UTF-8 +ar_YE.UTF-8 +ayc_PE +az_AZ +az_IR +as_IN +ast_ES.UTF-8 +be_BY.UTF-8 +be_BY@latin +bem_ZM +ber_DZ +ber_MA +bg_BG.UTF-8 +bhb_IN.UTF-8 +bho_IN +bho_NP +bi_VU +bn_BD +bn_IN +bo_CN +bo_IN +br_FR.UTF-8 +brx_IN +bs_BA.UTF-8 +byn_ER +ca_AD.UTF-8 +ca_ES.UTF-8 +ca_ES@valencia +ca_FR.UTF-8 +ca_IT.UTF-8 +ce_RU +ckb_IQ +chr_US +cmn_TW +crh_UA +cs_CZ.UTF-8 +csb_PL +cv_RU +cy_GB.UTF-8 +da_DK.UTF-8 +de_AT.UTF-8 +de_BE.UTF-8 +de_CH.UTF-8 +de_DE.UTF-8 +de_IT.UTF-8 +de_LI.UTF-8 +de_LU.UTF-8 +doi_IN +dsb_DE +dv_MV +dz_BT +el_GR.UTF-8 +el_CY.UTF-8 +en_AG +en_AU.UTF-8 +en_BW.UTF-8 +en_CA.UTF-8 +en_DK.UTF-8 +en_GB.UTF-8 +en_HK.UTF-8 +en_IE.UTF-8 +en_IL +en_IN +en_NG +en_NZ.UTF-8 +en_PH.UTF-8 +en_SC.UTF-8 +en_SG.UTF-8 +en_US.UTF-8 +en_ZA.UTF-8 +en_ZM +en_ZW.UTF-8 +eo +eo_US.UTF-8 +es_AR.UTF-8 +es_BO.UTF-8 +es_CL.UTF-8 +es_CO.UTF-8 +es_CR.UTF-8 +es_CU +es_DO.UTF-8 +es_EC.UTF-8 +es_ES.UTF-8 +es_GT.UTF-8 +es_HN.UTF-8 +es_MX.UTF-8 +es_NI.UTF-8 +es_PA.UTF-8 +es_PE.UTF-8 +es_PR.UTF-8 +es_PY.UTF-8 +es_SV.UTF-8 +es_US.UTF-8 +es_UY.UTF-8 +es_VE.UTF-8 +et_EE.UTF-8 +eu_ES.UTF-8 +eu_FR.UTF-8 +fa_IR +ff_SN +fi_FI.UTF-8 +fil_PH +fo_FO.UTF-8 +fr_BE.UTF-8 +fr_CA.UTF-8 +fr_CH.UTF-8 +fr_FR.UTF-8 +fr_LU.UTF-8 +fur_IT +fy_NL +fy_DE +ga_IE.UTF-8 +gd_GB.UTF-8 +gez_ER +gez_ER@abegede +gez_ET +gez_ET@abegede +gl_ES.UTF-8 +gu_IN +gv_GB.UTF-8 +ha_NG +hak_TW +he_IL.UTF-8 +hi_IN +hif_FJ +hne_IN +hr_HR.UTF-8 +hsb_DE.UTF-8 +ht_HT +hu_HU.UTF-8 +hy_AM +ia_FR +id_ID.UTF-8 +ig_NG +ik_CA +is_IS.UTF-8 +it_CH.UTF-8 +it_IT.UTF-8 +iu_CA +ja_JP.UTF-8 +ka_GE.UTF-8 +kab_DZ +kk_KZ.UTF-8 +kl_GL.UTF-8 +km_KH +kn_IN +ko_KR.UTF-8 +kok_IN +ks_IN +ks_IN@devanagari +ku_TR.UTF-8 +kw_GB.UTF-8 +ky_KG +lb_LU +lg_UG.UTF-8 +li_BE +li_NL +lij_IT +ln_CD +lo_LA +lt_LT.UTF-8 +lv_LV.UTF-8 +lzh_TW +mag_IN +mai_IN +mai_NP +mfe_MU +mg_MG.UTF-8 +mhr_RU +mi_NZ.UTF-8 +miq_NI +mjw_IN +mk_MK.UTF-8 +ml_IN +mn_MN +mni_IN +mnw_MM +mr_IN +ms_MY.UTF-8 +mt_MT.UTF-8 +my_MM +nan_TW +nan_TW@latin +nb_NO.UTF-8 +nds_DE +nds_NL +ne_NP +nhn_MX +niu_NU +niu_NZ +nl_AW +nl_BE.UTF-8 +nl_NL.UTF-8 +nn_NO.UTF-8 +nr_ZA +nso_ZA +oc_FR.UTF-8 +om_ET +om_KE.UTF-8 +or_IN +os_RU +pa_IN +pa_PK +pap_AW +pap_CW +pl_PL.UTF-8 +ps_AF +pt_BR.UTF-8 +pt_PT.UTF-8 +quz_PE +raj_IN +ro_RO.UTF-8 +ru_RU.UTF-8 +ru_UA.UTF-8 +rw_RW +sa_IN +sah_RU +sat_IN +sc_IT +sd_IN +sd_IN@devanagari +sd_PK +se_NO +sgs_LT +shn_MM +shs_CA +si_LK +sid_ET +sk_SK.UTF-8 +sl_SI.UTF-8 +sm_WS +so_DJ.UTF-8 +so_ET +so_KE.UTF-8 +so_SO.UTF-8 +sq_AL.UTF-8 +sq_MK +sr_ME +sr_RS +sr_RS@latin +ss_ZA +st_ZA.UTF-8 +sv_FI.UTF-8 +sv_SE.UTF-8 +sw_KE +sw_TZ +szl_PL +ta_IN +ta_LK +tcy_IN.UTF-8 +te_IN +tg_TJ.UTF-8 +th_TH.UTF-8 +the_NP +ti_ER +ti_ET +tig_ER +tk_TM +tl_PH.UTF-8 +tn_ZA +to_TO +tpi_PG +tr_CY.UTF-8 +tr_TR.UTF-8 +ts_ZA +tt_RU +tt_RU@iqtelif +ug_CN +ug_CN@latin +uk_UA.UTF-8 +unm_US +ur_IN +ur_PK +uz_UZ.UTF-8 +uz_UZ@cyrillic +ve_ZA +vi_VN +wa_BE.UTF-8 +wae_CH +wal_ET +wo_SN +xh_ZA.UTF-8 +yi_US.UTF-8 +yo_NG +yue_HK +yuw_PG +zh_CN.UTF-8 +zh_HK.UTF-8 +zh_SG.UTF-8 +zh_TW.UTF-8 +zu_ZA.UTF-8 From d52d1bfeee05544eed2ca941af171b4ed9d11b0a Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 19 Jul 2022 18:48:32 +0200 Subject: [PATCH 02/14] [locale] Add FreeBSD test data for locale-mapping --- src/modules/locale/Tests.cpp | 42 +++++++---- src/modules/locale/tests/locale-data-freebsd | 79 ++++++++++++++++++++ 2 files changed, 107 insertions(+), 14 deletions(-) create mode 100644 src/modules/locale/tests/locale-data-freebsd diff --git a/src/modules/locale/Tests.cpp b/src/modules/locale/Tests.cpp index db5751177..534751c52 100644 --- a/src/modules/locale/Tests.cpp +++ b/src/modules/locale/Tests.cpp @@ -390,8 +390,12 @@ LocaleTests::testKDENeonLanguageData() const QStringList neonLocales = splitTestFileIntoLines( QStringLiteral( "locale-data-neon" ) ); cDebug() << "Loaded KDE neon locales test data" << neonLocales.front() << "to" << neonLocales.back(); QCOMPARE( neonLocales.length(), 318 ); // wc -l tells me 318 lines - m_KDEneonLocales = neonLocales; + + const QStringList bsdLocales = splitTestFileIntoLines( QStringLiteral( "locale-data-freebsd" ) ); + cDebug() << "Loaded FreeBSD locales test data" << bsdLocales.front() << "to" << bsdLocales.back(); + QCOMPARE( bsdLocales.length(), 79 ); + m_FreeBSDLocales = bsdLocales; } void @@ -399,13 +403,17 @@ LocaleTests::testLanguageMapping_data() { QTest::addColumn< QString >( "selectedLanguage" ); QTest::addColumn< QString >( "KDEneonLanguage" ); + QTest::addColumn< QString >( "FreeBSDLanguage" ); // Tired of writing QString or QStringLiteral all the time. auto l = []( const char* p ) { return QString::fromUtf8( p ); }; + auto u = [](){ return QString(); }; // The KDEneon columns include the .UTF-8 from the source data + // The FreeBSD columns may have u() to indicate "same as KDEneon", + // that's an empty string. // - // Each column shows how a language -- which can be selected from the + // Each row shows how a language -- which can be selected from the // welcome page, and is inserted into GS as the language key that // Calamares knows -- should be mapped to a supported system locale. // @@ -413,16 +421,16 @@ LocaleTests::testLanguageMapping_data() // if there are languages with a _NL variant (e.g. nl_NL and nl_BE). // clang-format off - QTest::newRow( "en " ) << l( "en" ) << l( "en_US.UTF-8" ); - QTest::newRow( "en_GB" ) << l( "en_GB" ) << l( "en_GB.UTF-8" ); - QTest::newRow( "ca " ) << l( "ca" ) << l( "ca_ES.UTF-8" ); - QTest::newRow( "ca@vl" ) << l( "ca@valencia" ) << l( "ca_ES@valencia" ); - QTest::newRow( "sr " ) << l( "sr" ) << l( "sr_RS" ); - QTest::newRow( "sr@lt" ) << l( "sr@latin" ) << l( "sr_RS@latin" ); - QTest::newRow( "pt_PT" ) << l( "pt_PT" ) << l( "pt_PT.UTF-8" ); - QTest::newRow( "pt_BR" ) << l( "pt_BR" ) << l( "pt_BR.UTF-8" ); - QTest::newRow( "nl " ) << l( "nl" ) << l( "nl_NL.UTF-8" ); - QTest::newRow( "zh_TW" ) << l( "zh_TW" ) << l( "zh_TW.UTF-8" ); + QTest::newRow( "en " ) << l( "en" ) << l( "en_US.UTF-8" ) << u(); + QTest::newRow( "en_GB" ) << l( "en_GB" ) << l( "en_GB.UTF-8" ) << u(); + QTest::newRow( "ca " ) << l( "ca" ) << l( "ca_ES.UTF-8" ) << u(); + QTest::newRow( "ca@vl" ) << l( "ca@valencia" ) << l( "ca_ES@valencia" ) << u(); + QTest::newRow( "sr " ) << l( "sr" ) << l( "sr_RS" ) << u(); + QTest::newRow( "sr@lt" ) << l( "sr@latin" ) << l( "sr_RS@latin" ) << u(); + QTest::newRow( "pt_PT" ) << l( "pt_PT" ) << l( "pt_PT.UTF-8" ) << u(); + QTest::newRow( "pt_BR" ) << l( "pt_BR" ) << l( "pt_BR.UTF-8" ) << u(); + QTest::newRow( "nl " ) << l( "nl" ) << l( "nl_NL.UTF-8" ) << u(); + QTest::newRow( "zh_TW" ) << l( "zh_TW" ) << l( "zh_TW.UTF-8" ) << u(); // clang-format on } @@ -431,15 +439,21 @@ void LocaleTests::testLanguageMapping() { QVERIFY( !m_KDEneonLocales.isEmpty() ); + QVERIFY( !m_FreeBSDLocales.isEmpty() ); QFETCH( QString, selectedLanguage ); QFETCH( QString, KDEneonLanguage ); + QFETCH( QString, FreeBSDLanguage ); QVERIFY( Calamares::Locale::availableLanguages().contains( selectedLanguage ) ); - const auto r = LocaleConfiguration::fromLanguageAndLocation( + const auto neon = LocaleConfiguration::fromLanguageAndLocation( ( selectedLanguage ), m_KDEneonLocales, QStringLiteral( "NL" ) ); - QCOMPARE( r.language(), KDEneonLanguage ); + QCOMPARE( neon.language(), KDEneonLanguage ); + + const auto bsd = LocaleConfiguration::fromLanguageAndLocation( + ( selectedLanguage ), m_FreeBSDLocales, QStringLiteral( "NL" ) ); + QCOMPARE( bsd.language(), FreeBSDLanguage.isEmpty() ? KDEneonLanguage : FreeBSDLanguage ); } diff --git a/src/modules/locale/tests/locale-data-freebsd b/src/modules/locale/tests/locale-data-freebsd new file mode 100644 index 000000000..281839a90 --- /dev/null +++ b/src/modules/locale/tests/locale-data-freebsd @@ -0,0 +1,79 @@ +C.UTF-8 +af_ZA.UTF-8 +am_ET.UTF-8 +ar_AE.UTF-8 +ar_EG.UTF-8 +ar_JO.UTF-8 +ar_MA.UTF-8 +ar_QA.UTF-8 +ar_SA.UTF-8 +be_BY.UTF-8 +bg_BG.UTF-8 +ca_AD.UTF-8 +ca_ES.UTF-8 +ca_FR.UTF-8 +ca_IT.UTF-8 +cs_CZ.UTF-8 +da_DK.UTF-8 +de_AT.UTF-8 +de_CH.UTF-8 +de_DE.UTF-8 +el_GR.UTF-8 +en_AU.UTF-8 +en_CA.UTF-8 +en_GB.UTF-8 +en_HK.UTF-8 +en_IE.UTF-8 +en_NZ.UTF-8 +en_PH.UTF-8 +en_SG.UTF-8 +en_US.UTF-8 +en_ZA.UTF-8 +es_AR.UTF-8 +es_CR.UTF-8 +es_ES.UTF-8 +es_MX.UTF-8 +et_EE.UTF-8 +eu_ES.UTF-8 +fi_FI.UTF-8 +fr_BE.UTF-8 +fr_CA.UTF-8 +fr_CH.UTF-8 +fr_FR.UTF-8 +ga_IE.UTF-8 +he_IL.UTF-8 +hi_IN.UTF-8 +hr_HR.UTF-8 +hu_HU.UTF-8 +hy_AM.UTF-8 +is_IS.UTF-8 +it_CH.UTF-8 +it_IT.UTF-8 +ja_JP.UTF-8 +kk_KZ.UTF-8 +ko_KR.UTF-8 +lt_LT.UTF-8 +lv_LV.UTF-8 +mn_MN.UTF-8 +nb_NO.UTF-8 +nl_BE.UTF-8 +nl_NL.UTF-8 +nn_NO.UTF-8 +pl_PL.UTF-8 +pt_BR.UTF-8 +pt_PT.UTF-8 +ro_RO.UTF-8 +ru_RU.UTF-8 +se_FI.UTF-8 +se_NO.UTF-8 +sk_SK.UTF-8 +sl_SI.UTF-8 +sr_RS.UTF-8 +sr_RS.UTF-8@latin +sv_FI.UTF-8 +sv_SE.UTF-8 +tr_TR.UTF-8 +uk_UA.UTF-8 +zh_CN.UTF-8 +zh_HK.UTF-8 +zh_TW.UTF-8 From 84c0da21864de3cf098366bab18f4f83558e5624 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 19 Jul 2022 18:56:50 +0200 Subject: [PATCH 03/14] [locale] Test KDE neon and FreeBSD separately, same data - wrangle the test framework so it hands the same data to two different collections of tests; do KDE neon and FreeBSD separately so it's clearer which lookups are being done (and a failure in one doesn't prevent the test of the other). --- src/modules/locale/Tests.cpp | 47 +++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/src/modules/locale/Tests.cpp b/src/modules/locale/Tests.cpp index 534751c52..1a6887dc1 100644 --- a/src/modules/locale/Tests.cpp +++ b/src/modules/locale/Tests.cpp @@ -27,6 +27,9 @@ public: LocaleTests(); ~LocaleTests() override; + // Implementation of data for MappingNeon and MappingFreeBSD + void MappingData(); + private Q_SLOTS: void initTestCase(); // Check the sample config file is processed correctly @@ -48,8 +51,10 @@ private Q_SLOTS: // Check realistic language mapping for issue 2008 void testKDENeonLanguageData(); - void testLanguageMapping_data(); - void testLanguageMapping(); + void testLanguageMappingNeon_data(); + void testLanguageMappingNeon(); + void testLanguageMappingFreeBSD_data(); + void testLanguageMappingFreeBSD(); private: QStringList m_KDEneonLocales; @@ -399,7 +404,7 @@ LocaleTests::testKDENeonLanguageData() } void -LocaleTests::testLanguageMapping_data() +LocaleTests::MappingData() { QTest::addColumn< QString >( "selectedLanguage" ); QTest::addColumn< QString >( "KDEneonLanguage" ); @@ -424,9 +429,11 @@ LocaleTests::testLanguageMapping_data() QTest::newRow( "en " ) << l( "en" ) << l( "en_US.UTF-8" ) << u(); QTest::newRow( "en_GB" ) << l( "en_GB" ) << l( "en_GB.UTF-8" ) << u(); QTest::newRow( "ca " ) << l( "ca" ) << l( "ca_ES.UTF-8" ) << u(); - QTest::newRow( "ca@vl" ) << l( "ca@valencia" ) << l( "ca_ES@valencia" ) << u(); - QTest::newRow( "sr " ) << l( "sr" ) << l( "sr_RS" ) << u(); - QTest::newRow( "sr@lt" ) << l( "sr@latin" ) << l( "sr_RS@latin" ) << u(); + // FreeBSD has no Valencian variant + QTest::newRow( "ca@vl" ) << l( "ca@valencia" ) << l( "ca_ES@valencia" ) << l( "ca_ES.UTF-8" ); + // FreeBSD has the UTF-8 marker before the @region part + QTest::newRow( "sr " ) << l( "sr" ) << l( "sr_RS" ) << l( "sr_RS.UTF-8" ); + QTest::newRow( "sr@lt" ) << l( "sr@latin" ) << l( "sr_RS@latin" ) << l( "sr_RS.UTF-8@latin" ); QTest::newRow( "pt_PT" ) << l( "pt_PT" ) << l( "pt_PT.UTF-8" ) << u(); QTest::newRow( "pt_BR" ) << l( "pt_BR" ) << l( "pt_BR.UTF-8" ) << u(); QTest::newRow( "nl " ) << l( "nl" ) << l( "nl_NL.UTF-8" ) << u(); @@ -435,11 +442,20 @@ LocaleTests::testLanguageMapping_data() } +void LocaleTests::testLanguageMappingNeon_data() +{ + MappingData(); +} + +void LocaleTests::testLanguageMappingFreeBSD_data() +{ + MappingData(); +} + void -LocaleTests::testLanguageMapping() +LocaleTests::testLanguageMappingNeon() { QVERIFY( !m_KDEneonLocales.isEmpty() ); - QVERIFY( !m_FreeBSDLocales.isEmpty() ); QFETCH( QString, selectedLanguage ); QFETCH( QString, KDEneonLanguage ); @@ -450,10 +466,23 @@ LocaleTests::testLanguageMapping() const auto neon = LocaleConfiguration::fromLanguageAndLocation( ( selectedLanguage ), m_KDEneonLocales, QStringLiteral( "NL" ) ); QCOMPARE( neon.language(), KDEneonLanguage ); +} + +void +LocaleTests::testLanguageMappingFreeBSD() +{ + QVERIFY( !m_FreeBSDLocales.isEmpty() ); + + QFETCH( QString, selectedLanguage ); + QFETCH( QString, KDEneonLanguage ); + QFETCH( QString, FreeBSDLanguage ); + + QVERIFY( Calamares::Locale::availableLanguages().contains( selectedLanguage ) ); const auto bsd = LocaleConfiguration::fromLanguageAndLocation( ( selectedLanguage ), m_FreeBSDLocales, QStringLiteral( "NL" ) ); - QCOMPARE( bsd.language(), FreeBSDLanguage.isEmpty() ? KDEneonLanguage : FreeBSDLanguage ); + const auto expected = FreeBSDLanguage.isEmpty() ? KDEneonLanguage : FreeBSDLanguage; + QCOMPARE( bsd.language(), expected ); } From fd56b5bdc43f88bcaa44d728c3f157726c14b395 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 26 Jul 2022 22:10:46 +0200 Subject: [PATCH 04/14] [locale] Approach matching from a different angle - add struct that splits a locale name into parts - add tests that the splitting and joining works --- src/modules/locale/CMakeLists.txt | 3 +- src/modules/locale/LocaleNames.cpp | 72 ++++++++++++++++++++++++++++++ src/modules/locale/LocaleNames.h | 35 +++++++++++++++ src/modules/locale/Tests.cpp | 59 ++++++++++++++++++++++-- 4 files changed, 164 insertions(+), 5 deletions(-) create mode 100644 src/modules/locale/LocaleNames.cpp create mode 100644 src/modules/locale/LocaleNames.h diff --git a/src/modules/locale/CMakeLists.txt b/src/modules/locale/CMakeLists.txt index bad6042a6..b631f77f7 100644 --- a/src/modules/locale/CMakeLists.txt +++ b/src/modules/locale/CMakeLists.txt @@ -22,6 +22,7 @@ calamares_add_plugin(locale Config.cpp LCLocaleDialog.cpp LocaleConfiguration.cpp + LocaleNames.cpp LocalePage.cpp LocaleViewStep.cpp SetTimezoneJob.cpp @@ -39,7 +40,7 @@ calamares_add_plugin(locale calamares_add_test( localetest - SOURCES Tests.cpp Config.cpp LocaleConfiguration.cpp SetTimezoneJob.cpp timezonewidget/TimeZoneImage.cpp + SOURCES Tests.cpp Config.cpp LocaleConfiguration.cpp LocaleNames.cpp SetTimezoneJob.cpp timezonewidget/TimeZoneImage.cpp DEFINITIONS SOURCE_DIR="${CMAKE_CURRENT_LIST_DIR}/images" DEBUG_TIMEZONES=1 LIBRARIES Qt5::Gui ) diff --git a/src/modules/locale/LocaleNames.cpp b/src/modules/locale/LocaleNames.cpp new file mode 100644 index 000000000..148d21472 --- /dev/null +++ b/src/modules/locale/LocaleNames.cpp @@ -0,0 +1,72 @@ +/* === This file is part of Calamares - === + * + * SPDX-FileCopyrightText: 2022 Adriaan de Groot + * SPDX-License-Identifier: GPL-3.0-or-later + * + * Calamares is Free Software: see the License-Identifier above. + * + */ + +#include "LocaleNames.h" + +#include "utils/Logger.h" + +#include + +LocaleNameParts +LocaleNameParts::fromName( const QString& name ) +{ + auto requireAndRemoveLeadingChar = []( QChar c, QString s ) + { + if ( s.startsWith( c ) ) + { + return s.remove( 0, 1 ); + } + else + { + return QString(); + } + }; + + auto parts = QRegularExpression( "^([a-zA-Z]+)(_[a-zA-Z]+)?(\\.[-a-zA-Z0-9]+)?(@[a-zA-Z]+)?" ).match( name ); + const QString calamaresLanguage = parts.captured( 1 ); + const QString calamaresCountry = requireAndRemoveLeadingChar( '_', parts.captured( 2 ) ); + const QString calamaresEncoding = requireAndRemoveLeadingChar( '.', parts.captured( 3 ) ); + const QString calamaresRegion = requireAndRemoveLeadingChar( '@', parts.captured( 4 ) ); + + if ( calamaresLanguage.isEmpty() ) + { + return LocaleNameParts {}; + } + else + { + return LocaleNameParts { calamaresLanguage, calamaresCountry, calamaresRegion, calamaresEncoding }; + } +} + +QString +LocaleNameParts::name() const +{ + // We don't want QStringView to a temporary; force conversion + auto insertLeadingChar = []( QChar c, QString s ) -> QString + { + if ( s.isEmpty() ) + { + return QString(); + } + else + { + return c + s; + } + }; + + if ( !isValid() ) + { + return QString(); + } + else + { + return language + insertLeadingChar( '_', country ) + insertLeadingChar( '.', encoding ) + + insertLeadingChar( '@', region ); + } +} diff --git a/src/modules/locale/LocaleNames.h b/src/modules/locale/LocaleNames.h new file mode 100644 index 000000000..976ee20b3 --- /dev/null +++ b/src/modules/locale/LocaleNames.h @@ -0,0 +1,35 @@ +/* === This file is part of Calamares - === + * + * SPDX-FileCopyrightText: 2022 Adriaan de Groot + * SPDX-License-Identifier: GPL-3.0-or-later + * + * Calamares is Free Software: see the License-Identifier above. + * + */ + +#ifndef LOCALENAMES_H +#define LOCALENAMES_H + +#include + +/** @brief parts of a locale-name (e.g. "ar_LY.UTF-8", split apart) + * + * These are created from lines in `/usr/share/i18n/SUPPORTED`, + * which lists all the locales supported by the system (there + * are also other sources of the same). + * + */ +struct LocaleNameParts +{ + QString language; // e.g. "ar" + QString country; // e.g. "LY" (may be empty) + QString region; // e.g. "@valencia" (may be empty) + QString encoding; // e.g. "UTF-8" (may be empty) + + bool isValid() const { return !language.isEmpty(); } + QString name() const; + + static LocaleNameParts fromName( const QString& name ); +}; + +#endif diff --git a/src/modules/locale/Tests.cpp b/src/modules/locale/Tests.cpp index 1a6887dc1..85ec34026 100644 --- a/src/modules/locale/Tests.cpp +++ b/src/modules/locale/Tests.cpp @@ -9,6 +9,7 @@ #include "Config.h" #include "LocaleConfiguration.h" +#include "LocaleNames.h" #include "timezonewidget/TimeZoneImage.h" #include "Settings.h" @@ -49,8 +50,11 @@ private Q_SLOTS: void testLanguageDetection(); void testLanguageDetectionValencia(); - // Check realistic language mapping for issue 2008 + // Check that the test-data is available and ok void testKDENeonLanguageData(); + void testLocaleNameParts(); + + // Check realistic language mapping for issue 2008 void testLanguageMappingNeon_data(); void testLanguageMappingNeon(); void testLanguageMappingFreeBSD_data(); @@ -392,6 +396,10 @@ splitTestFileIntoLines( const QString& filename ) void LocaleTests::testKDENeonLanguageData() { + if ( !m_KDEneonLocales.isEmpty() ) + { + return; + } const QStringList neonLocales = splitTestFileIntoLines( QStringLiteral( "locale-data-neon" ) ); cDebug() << "Loaded KDE neon locales test data" << neonLocales.front() << "to" << neonLocales.back(); QCOMPARE( neonLocales.length(), 318 ); // wc -l tells me 318 lines @@ -412,7 +420,7 @@ LocaleTests::MappingData() // Tired of writing QString or QStringLiteral all the time. auto l = []( const char* p ) { return QString::fromUtf8( p ); }; - auto u = [](){ return QString(); }; + auto u = []() { return QString(); }; // The KDEneon columns include the .UTF-8 from the source data // The FreeBSD columns may have u() to indicate "same as KDEneon", @@ -442,12 +450,14 @@ LocaleTests::MappingData() } -void LocaleTests::testLanguageMappingNeon_data() +void +LocaleTests::testLanguageMappingNeon_data() { MappingData(); } -void LocaleTests::testLanguageMappingFreeBSD_data() +void +LocaleTests::testLanguageMappingFreeBSD_data() { MappingData(); } @@ -455,6 +465,7 @@ void LocaleTests::testLanguageMappingFreeBSD_data() void LocaleTests::testLanguageMappingNeon() { + testKDENeonLanguageData(); QVERIFY( !m_KDEneonLocales.isEmpty() ); QFETCH( QString, selectedLanguage ); @@ -471,6 +482,7 @@ LocaleTests::testLanguageMappingNeon() void LocaleTests::testLanguageMappingFreeBSD() { + testKDENeonLanguageData(); QVERIFY( !m_FreeBSDLocales.isEmpty() ); QFETCH( QString, selectedLanguage ); @@ -485,6 +497,45 @@ LocaleTests::testLanguageMappingFreeBSD() QCOMPARE( bsd.language(), expected ); } +void +LocaleTests::testLocaleNameParts() +{ + testKDENeonLanguageData(); + QVERIFY( !m_FreeBSDLocales.isEmpty() ); + QVERIFY( !m_KDEneonLocales.isEmpty() ); + + // Example constant locales + { + auto c_parts = LocaleNameParts::fromName( QStringLiteral( "nl_NL.UTF-8" ) ); + QCOMPARE( c_parts.language, QStringLiteral( "nl" ) ); + QCOMPARE( c_parts.country, QStringLiteral( "NL" ) ); + QCOMPARE( c_parts.encoding, QStringLiteral( "UTF-8" ) ); + QVERIFY( c_parts.region.isEmpty() ); + } + { + auto c_parts = LocaleNameParts::fromName( QStringLiteral( "C.UTF-8" ) ); + QCOMPARE( c_parts.language, QStringLiteral( "C" ) ); + QVERIFY( c_parts.country.isEmpty() ); + QCOMPARE( c_parts.encoding, QStringLiteral( "UTF-8" ) ); + QVERIFY( c_parts.region.isEmpty() ); + } + + // Check all the loaded test locales + for ( const auto& s : m_FreeBSDLocales ) + { + auto parts = LocaleNameParts::fromName( s ); + QVERIFY( parts.isValid() ); + QCOMPARE( parts.name(), s ); + } + + for ( const auto& s : m_KDEneonLocales ) + { + auto parts = LocaleNameParts::fromName( s ); + QVERIFY( parts.isValid() ); + QCOMPARE( parts.name(), s ); + } +} + #include "utils/moc-warnings.h" From 78e216fedbbcdcf4fc40c5a69e075521aa16be80 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Sun, 14 Aug 2022 16:26:46 +0200 Subject: [PATCH 05/14] [locale] Introduce a similarity-score for locales --- src/modules/locale/LocaleNames.cpp | 18 ++++++++++++++ src/modules/locale/LocaleNames.h | 8 ++++++ src/modules/locale/Tests.cpp | 40 ++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/src/modules/locale/LocaleNames.cpp b/src/modules/locale/LocaleNames.cpp index 148d21472..93e844446 100644 --- a/src/modules/locale/LocaleNames.cpp +++ b/src/modules/locale/LocaleNames.cpp @@ -70,3 +70,21 @@ LocaleNameParts::name() const + insertLeadingChar( '@', region ); } } + + +int +LocaleNameParts::similarity( const LocaleNameParts& other ) const +{ + if ( !isValid() || !other.isValid() ) + { + return 0; + } + if ( language != other.language ) + { + return 0; + } + const auto matched_region = ( region == other.region ? 30 : 0 ); + const auto matched_country = ( country == other.country ? 20 : 0 ); + const auto no_other_country_given = ( ( country != other.country && other.country.isEmpty() ) ? 10 : 0 ); + return 50 + matched_region + matched_country + no_other_country_given; +} diff --git a/src/modules/locale/LocaleNames.h b/src/modules/locale/LocaleNames.h index 976ee20b3..247a8496c 100644 --- a/src/modules/locale/LocaleNames.h +++ b/src/modules/locale/LocaleNames.h @@ -30,6 +30,14 @@ struct LocaleNameParts QString name() const; static LocaleNameParts fromName( const QString& name ); + + /** @brief Compute similarity-score with another locale-name. + * + * Similarity is driven by language and region, then country. + * Returns a number between 0 (no similarity, e.g. the + * language is different) and 100 (complete match). + */ + int similarity( const LocaleNameParts& other ) const; }; #endif diff --git a/src/modules/locale/Tests.cpp b/src/modules/locale/Tests.cpp index 85ec34026..fda58059a 100644 --- a/src/modules/locale/Tests.cpp +++ b/src/modules/locale/Tests.cpp @@ -59,6 +59,7 @@ private Q_SLOTS: void testLanguageMappingNeon(); void testLanguageMappingFreeBSD_data(); void testLanguageMappingFreeBSD(); + void testLanguageSimilarity(); private: QStringList m_KDEneonLocales; @@ -536,6 +537,45 @@ LocaleTests::testLocaleNameParts() } } +void +LocaleTests::testLanguageSimilarity() +{ + // Empty + { + QCOMPARE( LocaleNameParts().similarity( LocaleNameParts() ), 0 ); + } + // Some simple Dutch situations + { + auto nl_parts = LocaleNameParts::fromName( QStringLiteral( "nl_NL.UTF-8" ) ); + auto be_parts = LocaleNameParts::fromName( QStringLiteral( "nl_BE.UTF-8" ) ); + auto nl_short_parts = LocaleNameParts::fromName( QStringLiteral( "nl" ) ); + QCOMPARE( nl_parts.similarity( nl_parts ), 100 ); + QCOMPARE( nl_parts.similarity( LocaleNameParts() ), 0 ); + QCOMPARE( nl_parts.similarity( be_parts ), 80 ); // Language + (empty) region match + QCOMPARE( nl_parts.similarity( nl_short_parts ), 90 ); + } + + // Everything matches itself + { + if ( m_KDEneonLocales.isEmpty() ) + { + testKDENeonLanguageData(); + } + QVERIFY( !m_FreeBSDLocales.isEmpty() ); + QVERIFY( !m_KDEneonLocales.isEmpty() ); + for ( const auto& l : m_KDEneonLocales ) + { + auto locale_name = LocaleNameParts::fromName( l ); + auto self_similarity = locale_name.similarity( locale_name ); + if ( self_similarity != 100 ) + { + cDebug() << "Locale" << l << "is unusual."; + } + QCOMPARE( self_similarity, 100 ); + } + } +} + #include "utils/moc-warnings.h" From a988298a653295ef674061e81ca2d49d982d0a9f Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Sun, 14 Aug 2022 17:16:12 +0200 Subject: [PATCH 06/14] [localeq] Needs more shared sources from locale --- src/modules/localeq/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/modules/localeq/CMakeLists.txt b/src/modules/localeq/CMakeLists.txt index f086676e6..b8ae6a933 100644 --- a/src/modules/localeq/CMakeLists.txt +++ b/src/modules/localeq/CMakeLists.txt @@ -31,8 +31,9 @@ calamares_add_plugin(localeq EXPORT_MACRO PLUGINDLLEXPORT_PRO SOURCES LocaleQmlViewStep.cpp - ${_locale}/LocaleConfiguration.cpp ${_locale}/Config.cpp + ${_locale}/LocaleConfiguration.cpp + ${_locale}/LocaleNames.cpp ${_locale}/SetTimezoneJob.cpp RESOURCES localeq.qrc From cfb8ef9f65da630203c6ecd41b6ac13f8ac9c460 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Sun, 14 Aug 2022 17:16:31 +0200 Subject: [PATCH 07/14] [locale] Use locale-similarity for searching --- src/modules/locale/LocaleConfiguration.cpp | 135 ++++++--------------- src/modules/locale/LocaleNames.h | 3 + 2 files changed, 40 insertions(+), 98 deletions(-) diff --git a/src/modules/locale/LocaleConfiguration.cpp b/src/modules/locale/LocaleConfiguration.cpp index 17953f079..c857ca80f 100644 --- a/src/modules/locale/LocaleConfiguration.cpp +++ b/src/modules/locale/LocaleConfiguration.cpp @@ -9,11 +9,13 @@ */ #include "LocaleConfiguration.h" +#include "LocaleNames.h" #include "utils/Logger.h" #include #include +#include LocaleConfiguration::LocaleConfiguration() : explicit_lang( false ) @@ -47,90 +49,45 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, const QString& countryCode ) { cDebug() << "Mapping" << languageLocale << "in" << countryCode << "to locale."; - QString language = languageLocale.split( '_' ).first(); - QString region; - if ( language.contains( '@' ) ) - { - auto r = language.split( '@' ); - language = r.first(); - region = r[ 1 ]; // second() - } - - // Either an exact match, or the whole language part matches - // (followed by . or _ - QStringList linesForLanguage = availableLocales.filter( QRegularExpression( language + "[._]" ) ); - cDebug() << Logger::SubEntry << "Matching" << linesForLanguage; + const QString default_lang = QStringLiteral( "en_US.UTF-8" ); QString lang; - if ( linesForLanguage.isEmpty() || languageLocale.isEmpty() ) - { - lang = "en_US.UTF-8"; - } - else if ( linesForLanguage.length() == 1 ) - { - lang = linesForLanguage.first(); - } - // lang could still be empty if we found multiple locales that satisfy myLanguage - const QString combinedLanguageAndCountry = QString( "%1_%2" ).arg( language ).arg( countryCode ); - if ( lang.isEmpty() && region.isEmpty() ) + const LocaleNameParts self = LocaleNameParts::fromName( languageLocale ); + if ( self.isValid() && !availableLocales.isEmpty() ) { - auto l = linesForLanguage.filter( - QRegularExpression( combinedLanguageAndCountry + "[._]" ) ); // no regional variants - if ( l.length() == 1 ) + QVector< LocaleNameParts > others; + others.resize( availableLocales.length() ); // Makes default structs + std::transform( availableLocales.begin(), availableLocales.end(), others.begin(), LocaleNameParts::fromName ); + std::sort( others.begin(), + others.end(), + [ reference = self ]( const LocaleNameParts& lhs, const LocaleNameParts& rhs ) + { return reference.similarity( lhs ) < reference.similarity( rhs ); } ); + + // The best match is at the end + LocaleNameParts best_match = others.last(); + if ( !( self.similarity( best_match ) > LocaleNameParts::no_match ) ) { - lang = l.first(); + best_match = LocaleNameParts {}; } - } - - // The following block was inspired by Ubiquity, scripts/localechooser-apply. - // No copyright statement found in file, assuming GPL v2 or later. - /* # In the special cases of Portuguese and Chinese, selecting a - # different location may imply a different dialect of the language. - # In such cases, make LANG reflect the selected language (for - # messages, character types, and collation) and make the other - # locale categories reflect the selected location. */ - if ( language == "pt" || language == "zh" ) - { - cDebug() << Logger::SubEntry << "Special-case Portuguese and Chinese"; - QString proposedLocale = QString( "%1_%2" ).arg( language ).arg( countryCode ); - for ( const QString& line : linesForLanguage ) + // .. but it might match **better** with the chosen location country Code + if ( self.similarity( best_match ) < LocaleNameParts::complete_match ) { - if ( line.contains( proposedLocale ) ) + auto self_other_country( self ); + self_other_country.country = countryCode; + std::sort( others.begin(), + others.end(), + [ reference = self_other_country ]( const LocaleNameParts& lhs, const LocaleNameParts& rhs ) + { return reference.similarity( lhs ) < reference.similarity( rhs ); } ); + if ( self_other_country.similarity( others.last() ) > self.similarity( best_match ) ) { - cDebug() << Logger::SubEntry << "Country-variant" << line << "chosen."; - lang = line; - break; + best_match = others.last(); } } - } - if ( lang.isEmpty() && !region.isEmpty() ) - { - cDebug() << Logger::SubEntry << "Special-case region @" << region; - QString proposedRegion = QString( "@%1" ).arg( region ); - for ( const QString& line : linesForLanguage ) + if ( best_match.isValid() ) { - if ( line.startsWith( language ) && line.contains( proposedRegion ) ) - { - cDebug() << Logger::SubEntry << "Region-variant" << line << "chosen."; - lang = line; - break; - } - } - } - - // If we found no good way to set a default lang, do a search with the whole - // language locale and pick the first result, if any. - if ( lang.isEmpty() ) - { - for ( const QString& line : availableLocales ) - { - if ( line.startsWith( languageLocale ) ) - { - lang = line; - break; - } + lang = best_match.name(); } } @@ -138,7 +95,7 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, // en_US.UTF-8 UTF-8. This completes all default language setting guesswork. if ( lang.isEmpty() ) { - lang = "en_US.UTF-8"; + lang = default_lang; } @@ -188,34 +145,16 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, // We make a proposed locale based on the UI language and the timezone's country. There is no // guarantee that this will be a valid, supported locale (often it won't). QString lc_formats; - const QString combined = QString( "%1_%2" ).arg( language ).arg( countryCode ); - if ( lang.isEmpty() ) + const QString combined = QString( "%1_%2" ).arg( self.language ).arg( countryCode ); + if ( availableLocales.contains( lang ) ) { - cDebug() << Logger::SubEntry << "Looking up formats for" << combinedLanguageAndCountry; - // We look up if it's a supported locale. - for ( const QString& line : availableLocales ) - { - if ( line.startsWith( combinedLanguageAndCountry ) ) - { - lang = line; - lc_formats = line; - break; - } - } + cDebug() << Logger::SubEntry << "Exact formats match for language tag" << lang; + lc_formats = lang; } - else + else if ( availableLocales.contains( combined ) ) { - if ( availableLocales.contains( lang ) ) - { - cDebug() << Logger::SubEntry << "Exact formats match for language tag" << lang; - lc_formats = lang; - } - else if ( availableLocales.contains( combinedLanguageAndCountry ) ) - { - cDebug() << Logger::SubEntry << "Exact formats match for combined" << combinedLanguageAndCountry; - lang = combinedLanguageAndCountry; - lc_formats = combinedLanguageAndCountry; - } + cDebug() << Logger::SubEntry << "Exact formats match for combined" << combined; + lc_formats = combined; } if ( lc_formats.isEmpty() ) diff --git a/src/modules/locale/LocaleNames.h b/src/modules/locale/LocaleNames.h index 247a8496c..8498aa28a 100644 --- a/src/modules/locale/LocaleNames.h +++ b/src/modules/locale/LocaleNames.h @@ -31,6 +31,9 @@ struct LocaleNameParts static LocaleNameParts fromName( const QString& name ); + static inline constexpr const int no_match = 0; + static inline constexpr const int complete_match = 100; + /** @brief Compute similarity-score with another locale-name. * * Similarity is driven by language and region, then country. From eb242168bf4ac80aa920d795a9b0cbec632af8f1 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Sun, 14 Aug 2022 21:45:45 +0200 Subject: [PATCH 08/14] [locale] Log what we matched with (for language) --- src/modules/locale/LocaleConfiguration.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/locale/LocaleConfiguration.cpp b/src/modules/locale/LocaleConfiguration.cpp index c857ca80f..ac5452456 100644 --- a/src/modules/locale/LocaleConfiguration.cpp +++ b/src/modules/locale/LocaleConfiguration.cpp @@ -86,7 +86,7 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, } if ( best_match.isValid() ) { - + cDebug() << Logger::SubEntry << "Matched best with" << best_match.name(); lang = best_match.name(); } } From 40527ffd4e6585038ac9cbbe3e75853a7b6fd6e0 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Mon, 22 Aug 2022 23:48:21 +0200 Subject: [PATCH 09/14] [locale] Be more chatty while matching locales --- src/modules/locale/LocaleConfiguration.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/modules/locale/LocaleConfiguration.cpp b/src/modules/locale/LocaleConfiguration.cpp index ac5452456..c7db10af1 100644 --- a/src/modules/locale/LocaleConfiguration.cpp +++ b/src/modules/locale/LocaleConfiguration.cpp @@ -68,8 +68,14 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, LocaleNameParts best_match = others.last(); if ( !( self.similarity( best_match ) > LocaleNameParts::no_match ) ) { + cDebug() << Logger::SubEntry << "Got no good match for" << languageLocale; best_match = LocaleNameParts {}; } + else + { + cDebug() << Logger::SubEntry << "Got best match for" << languageLocale << "as" << best_match.name(); + } + // .. but it might match **better** with the chosen location country Code if ( self.similarity( best_match ) < LocaleNameParts::complete_match ) { @@ -82,6 +88,8 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, if ( self_other_country.similarity( others.last() ) > self.similarity( best_match ) ) { best_match = others.last(); + cDebug() << Logger::SubEntry << "Found better match with country" << countryCode << "as" + << best_match.name(); } } if ( best_match.isValid() ) From 6cbf2d7e3292f4d15af1452117d4ecc6b3786caa Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 23 Aug 2022 00:03:04 +0200 Subject: [PATCH 10/14] [locale] Factor out the guess-language part --- src/modules/locale/LocaleConfiguration.cpp | 44 +++++++++++----------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/src/modules/locale/LocaleConfiguration.cpp b/src/modules/locale/LocaleConfiguration.cpp index c7db10af1..b6cf97453 100644 --- a/src/modules/locale/LocaleConfiguration.cpp +++ b/src/modules/locale/LocaleConfiguration.cpp @@ -42,16 +42,14 @@ LocaleConfiguration::setLanguage( const QString& localeName ) m_lang = localeName; } - -LocaleConfiguration -LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, - const QStringList& availableLocales, - const QString& countryCode ) +/** @brief Returns the QString from @p availableLocales that best-matches. + */ +static LocaleNameParts +identifyBestLanguageMatch( const QString& languageLocale, + const QStringList& availableLocales, + const QString& countryCode ) { - cDebug() << "Mapping" << languageLocale << "in" << countryCode << "to locale."; - const QString default_lang = QStringLiteral( "en_US.UTF-8" ); - QString lang; const LocaleNameParts self = LocaleNameParts::fromName( languageLocale ); if ( self.isValid() && !availableLocales.isEmpty() ) @@ -95,17 +93,22 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, if ( best_match.isValid() ) { cDebug() << Logger::SubEntry << "Matched best with" << best_match.name(); - lang = best_match.name(); + return best_match; } } // Else we have an unrecognized or unsupported locale, all we can do is go with // en_US.UTF-8 UTF-8. This completes all default language setting guesswork. - if ( lang.isEmpty() ) - { - lang = default_lang; - } + return LocaleNameParts::fromName( default_lang ); +} +LocaleConfiguration +LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, + const QStringList& availableLocales, + const QString& countryCode ) +{ + cDebug() << "Mapping" << languageLocale << "in" << countryCode << "to locale."; + const auto bestLocale = identifyBestLanguageMatch( languageLocale, availableLocales, countryCode ); // The following block was inspired by Ubiquity, scripts/localechooser-apply. // No copyright statement found in file, assuming GPL v2 or later. @@ -153,11 +156,11 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, // We make a proposed locale based on the UI language and the timezone's country. There is no // guarantee that this will be a valid, supported locale (often it won't). QString lc_formats; - const QString combined = QString( "%1_%2" ).arg( self.language ).arg( countryCode ); - if ( availableLocales.contains( lang ) ) + const QString combined = QString( "%1_%2" ).arg( bestLocale.language ).arg( countryCode ); + if ( availableLocales.contains( bestLocale.language ) ) { - cDebug() << Logger::SubEntry << "Exact formats match for language tag" << lang; - lc_formats = lang; + cDebug() << Logger::SubEntry << "Exact formats match for language tag" << bestLocale.language; + lc_formats = bestLocale.language; } else if ( availableLocales.contains( combined ) ) { @@ -250,12 +253,7 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, // If we cannot make a good choice for a given country we go with the LANG // setting, which defaults to en_US.UTF-8 UTF-8 if all else fails. - if ( lc_formats.isEmpty() ) - { - lc_formats = lang; - } - - return LocaleConfiguration( lang, lc_formats ); + return LocaleConfiguration( bestLocale.name(), lc_formats.isEmpty() ? bestLocale.name() : lc_formats ); } From a422fd80d98a5cdbd6db1d9f5405a1bd432bdff9 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 23 Aug 2022 00:46:40 +0200 Subject: [PATCH 11/14] [locale] Refactor matching some more - find the best score and match relative to a specific set of parts; make it easy to update the country-setting - look for a complete match, or best match, with three country settings --- src/modules/locale/LocaleConfiguration.cpp | 95 ++++++++++++++++------ 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/src/modules/locale/LocaleConfiguration.cpp b/src/modules/locale/LocaleConfiguration.cpp index b6cf97453..c62b1ab08 100644 --- a/src/modules/locale/LocaleConfiguration.cpp +++ b/src/modules/locale/LocaleConfiguration.cpp @@ -42,6 +42,34 @@ LocaleConfiguration::setLanguage( const QString& localeName ) m_lang = localeName; } +static LocaleNameParts +updateCountry( LocaleNameParts p, const QString& country ) +{ + p.country = country; + return p; +} + +static QPair< int, LocaleNameParts > +identifyBestLanguageMatch( const LocaleNameParts& referenceLocale, QVector< LocaleNameParts >& others ) +{ + std::sort( others.begin(), + others.end(), + [ & ]( const LocaleNameParts& lhs, const LocaleNameParts& rhs ) + { return referenceLocale.similarity( lhs ) < referenceLocale.similarity( rhs ); } ); + // The best match is at the end + LocaleNameParts best_match = others.last(); + if ( !( referenceLocale.similarity( best_match ) > LocaleNameParts::no_match ) ) + { + cDebug() << Logger::SubEntry << "Got no good match for" << referenceLocale.name(); + return { LocaleNameParts::no_match, LocaleNameParts {} }; + } + else + { + cDebug() << Logger::SubEntry << "Got best match for" << referenceLocale.name() << "as" << best_match.name(); + return { referenceLocale.similarity( best_match ), best_match }; + } +} + /** @brief Returns the QString from @p availableLocales that best-matches. */ static LocaleNameParts @@ -57,39 +85,52 @@ identifyBestLanguageMatch( const QString& languageLocale, QVector< LocaleNameParts > others; others.resize( availableLocales.length() ); // Makes default structs std::transform( availableLocales.begin(), availableLocales.end(), others.begin(), LocaleNameParts::fromName ); - std::sort( others.begin(), - others.end(), - [ reference = self ]( const LocaleNameParts& lhs, const LocaleNameParts& rhs ) - { return reference.similarity( lhs ) < reference.similarity( rhs ); } ); - // The best match is at the end - LocaleNameParts best_match = others.last(); - if ( !( self.similarity( best_match ) > LocaleNameParts::no_match ) ) - { - cDebug() << Logger::SubEntry << "Got no good match for" << languageLocale; - best_match = LocaleNameParts {}; - } - else - { - cDebug() << Logger::SubEntry << "Got best match for" << languageLocale << "as" << best_match.name(); - } + // Keep track of the best match in various attempts + int best_score = LocaleNameParts::no_match; + LocaleNameParts best_match; - // .. but it might match **better** with the chosen location country Code - if ( self.similarity( best_match ) < LocaleNameParts::complete_match ) + // Check with the unmodified language setting { - auto self_other_country( self ); - self_other_country.country = countryCode; - std::sort( others.begin(), - others.end(), - [ reference = self_other_country ]( const LocaleNameParts& lhs, const LocaleNameParts& rhs ) - { return reference.similarity( lhs ) < reference.similarity( rhs ); } ); - if ( self_other_country.similarity( others.last() ) > self.similarity( best_match ) ) + auto [ score, match ] = identifyBestLanguageMatch( self, others ); + if ( score >= LocaleNameParts::complete_match ) { - best_match = others.last(); - cDebug() << Logger::SubEntry << "Found better match with country" << countryCode << "as" - << best_match.name(); + return match; + } + else if ( score > best_score ) + { + best_match = match; } } + + + // .. but it might match **better** with the chosen location country Code + { + auto [ score, match ] = identifyBestLanguageMatch( updateCountry( self, countryCode ), others ); + if ( score >= LocaleNameParts::complete_match ) + { + return match; + } + else if ( score > best_score ) + { + best_match = match; + } + } + + // .. or better yet with the QLocale-derived country + { + const QString localeCountry = LocaleNameParts::fromName( QLocale( languageLocale ).name() ).country; + auto [ score, match ] = identifyBestLanguageMatch( updateCountry( self, localeCountry ), others ); + if ( score >= LocaleNameParts::complete_match ) + { + return match; + } + else if ( score > best_score ) + { + best_match = match; + } + } + if ( best_match.isValid() ) { cDebug() << Logger::SubEntry << "Matched best with" << best_match.name(); From 35401214499ff6ed98b1dd143883e7b7d4d84130 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 23 Aug 2022 02:02:24 +0200 Subject: [PATCH 12/14] [locale] Prefer non-empty country matches Prefer "en_US" over "en" even when asking for "en". --- src/modules/locale/LocaleNames.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/locale/LocaleNames.cpp b/src/modules/locale/LocaleNames.cpp index 93e844446..401aa4809 100644 --- a/src/modules/locale/LocaleNames.cpp +++ b/src/modules/locale/LocaleNames.cpp @@ -84,7 +84,7 @@ LocaleNameParts::similarity( const LocaleNameParts& other ) const return 0; } const auto matched_region = ( region == other.region ? 30 : 0 ); - const auto matched_country = ( country == other.country ? 20 : 0 ); + const auto matched_country = ( country == other.country ? ( country.isEmpty() ? 10 : 20 ) : 0 ); const auto no_other_country_given = ( ( country != other.country && other.country.isEmpty() ) ? 10 : 0 ); return 50 + matched_region + matched_country + no_other_country_given; } From fb3112b75dd9eb548ce3c4e0eb925fcf62bb12dc Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 23 Aug 2022 02:02:54 +0200 Subject: [PATCH 13/14] [locale] Repair tests - prefers language default country (ca_ES over ca_AD) - prefers non-empty country match --- src/modules/locale/Tests.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/modules/locale/Tests.cpp b/src/modules/locale/Tests.cpp index fda58059a..7c883df05 100644 --- a/src/modules/locale/Tests.cpp +++ b/src/modules/locale/Tests.cpp @@ -307,10 +307,10 @@ LocaleTests::testLanguageDetection_data() QTest::newRow( "english (US)" ) << QStringLiteral( "en" ) << QStringLiteral( "US" ) << QStringLiteral( "en_US.UTF-8" ); QTest::newRow( "english (CA)" ) << QStringLiteral( "en" ) << QStringLiteral( "CA" ) - << QStringLiteral( "en" ); // because it's first in the list + << QStringLiteral( "en_US.UTF-8" ); QTest::newRow( "english (GB)" ) << QStringLiteral( "en" ) << QStringLiteral( "GB" ) << QStringLiteral( "en_GB.UTF-8" ); - QTest::newRow( "english (NL)" ) << QStringLiteral( "en" ) << QStringLiteral( "NL" ) << QStringLiteral( "en" ); + QTest::newRow( "english (NL)" ) << QStringLiteral( "en" ) << QStringLiteral( "NL" ) << QStringLiteral( "en_US.UTF-8" ); QTest::newRow( "portuguese (PT)" ) << QStringLiteral( "pt" ) << QStringLiteral( "PT" ) << QStringLiteral( "pt_PT.UTF-8" ); @@ -320,11 +320,11 @@ LocaleTests::testLanguageDetection_data() << QStringLiteral( "pt_BR.UTF-8" ); QTest::newRow( "catalan ()" ) << QStringLiteral( "ca" ) << QStringLiteral( "" ) - << QStringLiteral( "ca_AD.UTF-8" ); // no country given? Matches first + << QStringLiteral( "ca_ES.UTF-8" ); // no country given? Matches QLocale-default QTest::newRow( "catalan (ES)" ) << QStringLiteral( "ca" ) << QStringLiteral( "ES" ) << QStringLiteral( "ca_ES.UTF-8" ); QTest::newRow( "catalan (NL)" ) << QStringLiteral( "ca" ) << QStringLiteral( "NL" ) - << QStringLiteral( "ca_AD.UTF-8" ); + << QStringLiteral( "ca_ES.UTF-8" ); QTest::newRow( "catalan (@valencia)" ) << QStringLiteral( "ca@valencia" ) << QStringLiteral( "ES" ) << QStringLiteral( "ca_ES@valencia" ); // Prefers regional variant QTest::newRow( "catalan (@valencia_NL)" ) From 9a4d992778355d115a1d3cbd6b3ef7d61626e4b0 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 23 Aug 2022 02:06:06 +0200 Subject: [PATCH 14/14] [locale] Repair tests - Esperanto now doesn't quite self-match because it has no country - sr prefers RS as country over ME --- src/modules/locale/Tests.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/modules/locale/Tests.cpp b/src/modules/locale/Tests.cpp index 7c883df05..56327154a 100644 --- a/src/modules/locale/Tests.cpp +++ b/src/modules/locale/Tests.cpp @@ -371,7 +371,7 @@ LocaleTests::testLanguageDetectionValencia() { auto r = LocaleConfiguration::fromLanguageAndLocation( QStringLiteral( "sr" ), availableLocales, QStringLiteral( "NL" ) ); - QCOMPARE( r.language(), "sr_ME" ); // Because that one is first in the list + QCOMPARE( r.language(), "sr_RS" ); // Because that one is first in the list } { auto r = LocaleConfiguration::fromLanguageAndLocation( @@ -570,6 +570,10 @@ LocaleTests::testLanguageSimilarity() if ( self_similarity != 100 ) { cDebug() << "Locale" << l << "is unusual."; + if ( l == QStringLiteral( "eo" ) ) + { + QEXPECT_FAIL( "", "Esperanto has no country to match", Continue ); + } } QCOMPARE( self_similarity, 100 ); }