cleaned up ISO code usage for Kurdish, fdo#63460

* instead of the 'ku' macrolanguage code use proper ISO 639-3 codes and
  use 'Latn' script with 'kmr'
* use MS-LCID 0x0492 for Central Kurdish (Iraq) [ckb-IQ]
* added Southern Kurdish (Iraq) [sdh-IQ]

Change-Id: Iaee8be98d0659a0e7bbf041e60025dd1f771066f
diff --git a/i18nlangtag/qa/cppunit/test_languagetag.cxx b/i18nlangtag/qa/cppunit/test_languagetag.cxx
index 17217fd..8946969 100644
--- a/i18nlangtag/qa/cppunit/test_languagetag.cxx
+++ b/i18nlangtag/qa/cppunit/test_languagetag.cxx
@@ -625,6 +625,14 @@ static bool checkMapping( const OUString rStr1, const OUString& rStr2 )
    if (rStr1 == "yi-Hebr-IL"  ) return rStr2 == "yi-IL";
    if (rStr1 == "ha-NG"       ) return rStr2 == "ha-Latn-NG";
    if (rStr1 == "ha-GH"       ) return rStr2 == "ha-Latn-GH";
    if (rStr1 == "ku-Arab-IQ"  ) return rStr2 == "ckb-IQ";
    if (rStr1 == "ku-Arab"     ) return rStr2 == "ckb";
    if (rStr1 == "kmr-TR"      ) return rStr2 == "kmr-Latn-TR";
    if (rStr1 == "ku-TR"       ) return rStr2 == "kmr-Latn-TR";
    if (rStr1 == "kmr-SY"      ) return rStr2 == "kmr-Latn-SY";
    if (rStr1 == "ku-SY"       ) return rStr2 == "kmr-Latn-SY";
    if (rStr1 == "ku-IQ"       ) return rStr2 == "ckb-IQ";
    if (rStr1 == "ku-IR"       ) return rStr2 == "ckb-IR";
    return rStr1 == rStr2;
}

diff --git a/i18nlangtag/source/isolang/isolang.cxx b/i18nlangtag/source/isolang/isolang.cxx
index f9f34a5..5505540 100644
--- a/i18nlangtag/source/isolang/isolang.cxx
+++ b/i18nlangtag/source/isolang/isolang.cxx
@@ -506,10 +506,17 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] =
    { LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN,"dsb", "DE", 0     },
    { LANGUAGE_OCCITAN_FRANCE,              "oc", "FR", 0     },
    { LANGUAGE_OBSOLETE_USER_OCCITAN,       "oc", "FR", 0     },
    { LANGUAGE_USER_KURDISH_TURKEY,         "ku", "TR", 0     },
    { LANGUAGE_USER_KURDISH_SYRIA,          "ku", "SY", 0     },
    { LANGUAGE_USER_KURDISH_IRAQ,           "ku", "IQ", 0     },
    { LANGUAGE_USER_KURDISH_IRAN,           "ku", "IR", 0     },
    { LANGUAGE_USER_KURDISH_TURKEY,        "kmr", "TR", kSAME },
    { LANGUAGE_USER_KURDISH_TURKEY,         "ku", "TR", kSAME },
    { LANGUAGE_USER_KURDISH_SYRIA,         "kmr", "SY", kSAME },
    { LANGUAGE_USER_KURDISH_SYRIA,          "ku", "SY", kSAME },
    { LANGUAGE_KURDISH_ARABIC_IRAQ,        "ckb", "IQ", 0     },
    { LANGUAGE_KURDISH_ARABIC_IRAQ,         "ku", "IQ", kSAME },
    { LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ,  "ku", "IQ", LANGUAGE_KURDISH_ARABIC_IRAQ },
    { LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ, "sdh", "IQ", 0     },
    { LANGUAGE_USER_KURDISH_IRAN,          "ckb", "IR", 0     },
    { LANGUAGE_USER_KURDISH_IRAN,           "ku", "IR", kSAME },
    { LANGUAGE_KURDISH_ARABIC_LSO,         "ckb", ""  , 0     },
    { LANGUAGE_USER_SARDINIAN,              "sc", "IT", 0     },    // macrolanguage code
    { LANGUAGE_USER_SARDINIAN_CAMPIDANESE, "sro", "IT", 0     },
    { LANGUAGE_USER_SARDINIAN_GALLURESE,   "sdn", "IT", 0     },
@@ -697,8 +704,10 @@ static IsoLanguageScriptCountryEntry const aImplIsoLangScriptEntries[] =
    { LANGUAGE_LATIN_LSO,                           "la-Latn", ""  , kSAME },   // MS, though Latn is suppress-script
    { LANGUAGE_TAI_NUA_CHINA,                      "tdd-Tale", "CN", 0     },   // MS reserved
    { LANGUAGE_LU_CHINA,                           "khb-Talu", "CN", 0     },   // MS reserved
    { LANGUAGE_KURDISH_ARABIC_IRAQ,                 "ku-Arab", "IQ", 0     },   // macrolanguage code, MS
    { LANGUAGE_KURDISH_ARABIC_LSO,                  "ku-Arab", ""  , 0     },   // macrolanguage code
    { LANGUAGE_KURDISH_ARABIC_IRAQ,                 "ku-Arab", "IQ", kSAME },   // macrolanguage code, MS
    { LANGUAGE_KURDISH_ARABIC_LSO,                  "ku-Arab", ""  , kSAME },   // macrolanguage code, MS
    { LANGUAGE_USER_KURDISH_TURKEY,                "kmr-Latn", "TR", 0     },
    { LANGUAGE_USER_KURDISH_SYRIA,                 "kmr-Latn", "SY", 0     },
    { LANGUAGE_PUNJABI_PAKISTAN,                   "pnb-Arab", "PK", 0     },
    { LANGUAGE_PUNJABI_ARABIC_LSO,                 "pnb-Arab", ""  , 0     },
    { LANGUAGE_PUNJABI_PAKISTAN,                    "pa-Arab", "PK", 0     },   // MS, incorrect
diff --git a/i18nlangtag/source/isolang/mslangid.cxx b/i18nlangtag/source/isolang/mslangid.cxx
index 3964c6f..d44256d 100644
--- a/i18nlangtag/source/isolang/mslangid.cxx
+++ b/i18nlangtag/source/isolang/mslangid.cxx
@@ -216,6 +216,18 @@ bool MsLangId::isRightToLeft( LanguageType nLang )
        default:
            break;
    }
    switch (nLang)
    {
        case LANGUAGE_USER_KURDISH_IRAN:
        case LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ:
        case LANGUAGE_KURDISH_ARABIC_IRAQ:
        case LANGUAGE_KURDISH_ARABIC_LSO:
        case LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ:
            return true;

        default:
            break;
    }
    return false;
}

@@ -323,7 +335,10 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang )
        case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA:
        case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO:
        case LANGUAGE_USER_KURDISH_IRAN:
        case LANGUAGE_USER_KURDISH_IRAQ:
        case LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ:
        case LANGUAGE_KURDISH_ARABIC_IRAQ:
        case LANGUAGE_KURDISH_ARABIC_LSO:
        case LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ:
        case LANGUAGE_USER_KYRGYZ_CHINA:
            nScript = ::com::sun::star::i18n::ScriptType::COMPLEX;
            break;
@@ -507,6 +522,9 @@ LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang, bo
        case LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO:
            nLang = LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO;
            break;
        case LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ:
            nLang = LANGUAGE_KURDISH_ARABIC_IRAQ;
            break;

        // The following are not strictly obsolete but should be mapped to a
        // replacement locale when encountered.
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
index b0a8f4d..60e8c12 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -2073,6 +2073,11 @@ LanguageTag & LanguageTag::makeFallback()
}


/* TODO: maybe this now could take advantage of the mnOverride field in
 * isolang.cxx entries and search for kSAME instead of harcoded special
 * fallbacks. Though iterating through those tables would be slower and even
 * then there would be some special cases, but we wouldn't lack entries that
 * were missed out. */
::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
{
    ::std::vector< OUString > aVec;
@@ -2111,6 +2116,36 @@ LanguageTag & LanguageTag::makeFallback()
                aVec.insert( aVec.end(), aRep.begin(), aRep.end());
                // Already includes 'ca' language fallback.
            }
            else if (aLanguage == "ku")
            {
                if (aCountry == "TR" || aCountry == "SY")
                {
                    aVec.push_back( "kmr-Latn-" + aCountry);
                    aVec.push_back( "kmr-" + aCountry);
                    aVec.push_back( "kmr-Latn");
                    aVec.push_back( "kmr");
                    aVec.push_back( aLanguage);
                }
                else if (aCountry == "IQ" || aCountry == "IR")
                {
                    aVec.push_back( "ckb-" + aCountry);
                    aVec.push_back( "ckb");
                }
            }
            else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
            {
                aVec.push_back( "ku-Latn-" + aCountry);
                aVec.push_back( "ku-" + aCountry);
                aVec.push_back( aLanguage);
                aVec.push_back( "ku");
            }
            else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
            {
                aVec.push_back( "ku-Arab-" + aCountry);
                aVec.push_back( "ku-" + aCountry);
                aVec.push_back( aLanguage);
                // not 'ku' only, that was used for Latin script
            }
            else
                aVec.push_back( aLanguage);
        }
@@ -2174,6 +2209,8 @@ LanguageTag & LanguageTag::makeFallback()
            }
            else if (aLanguage == "pi" && aScript == "Latn")
                aVec.push_back( "pli");     // a special case for Pali dictionary, see fdo#41599
            else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
                aVec.push_back( "ku-" + aCountry);
        }
        if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
        {
@@ -2184,10 +2221,16 @@ LanguageTag & LanguageTag::makeFallback()
        aTmp = aLanguage + "-" + aScript;
        if (aTmp != maBcp47)
            aVec.push_back( aTmp);

        // 'sh' actually denoted a script, so have it here instead of appended
        // at the end as language-only.
        if (aLanguage == "sr" && aScript == "Latn")
            aVec.push_back( "sh");
        else if (aLanguage == "ku" && aScript == "Arab")
            aVec.push_back( "ckb");
        // 'ku' only denoted Latin script
        else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
            aVec.push_back( "ku");
    }
    bool bHaveLanguageVariant = false;
    if (!aCountry.isEmpty())
diff --git a/include/i18nlangtag/lang.h b/include/i18nlangtag/lang.h
index aefe380..e08c152 100644
--- a/include/i18nlangtag/lang.h
+++ b/include/i18nlangtag/lang.h
@@ -277,7 +277,7 @@ typedef unsigned short LanguageType;
#define LANGUAGE_KONKANI                    0x0457
#define LANGUAGE_KOREAN                     0x0412
#define LANGUAGE_KOREAN_JOHAB               0x0812  /* not mentioned in MS-LCID.pdf, oh joy */
#define LANGUAGE_KURDISH_ARABIC_IRAQ        0x0492  /* TODO: obsoletes LANGUAGE_USER_KURDISH_IRAQ 0x0E26 */
#define LANGUAGE_KURDISH_ARABIC_IRAQ        0x0492  /* obsoletes LANGUAGE_USER_KURDISH_IRAQ 0x0E26 */
#define LANGUAGE_KURDISH_ARABIC_LSO         0x7C92
#define LANGUAGE_LAO                        0x0454
#define LANGUAGE_LATIN_LSO                  0x0476  /* obsoletes LANGUAGE_USER_LATIN 0x0610 */
@@ -531,8 +531,10 @@ typedef unsigned short LanguageType;
#define LANGUAGE_USER_KOREAN_NORTH          0x8012  /* North Korean as opposed to South Korean, makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_KOREAN)) */
#define LANGUAGE_USER_KURDISH_TURKEY        0x0626  /* sublang 0x01, Latin script */
#define LANGUAGE_USER_KURDISH_SYRIA         0x0A26  /* sublang 0x02, Latin script */
#define LANGUAGE_USER_KURDISH_IRAQ          0x0E26  /* sublang 0x03, Arabic script */
#define LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ 0x0E26  /* sublang 0x03, Arabic script */
#define LANGUAGE_USER_KURDISH_IRAQ          LANGUAGE_KURDISH_ARABIC_IRAQ
#define LANGUAGE_USER_KURDISH_IRAN          0x1226  /* sublang 0x04, Arabic script */
#define LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ 0x8092  /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_KURDISH_ARABIC_LSO)) */
#define LANGUAGE_USER_SARDINIAN             0x0627
/* was reserved for Dzongkha but turned down with #i53497#: 0x0628 */  /* obsoleted by LANGUAGE_DZONGKHA */
#define LANGUAGE_USER_DZONGKHA_MAP_LONLY    0xF851  /* to map "dz" only, because of the MS error, and preserve CTL information, sub 0x3e */
diff --git a/svtools/source/misc/langtab.src b/svtools/source/misc/langtab.src
index 7494617a..0f5f584 100644
--- a/svtools/source/misc/langtab.src
+++ b/svtools/source/misc/langtab.src
@@ -212,10 +212,11 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE
        < "Bengali (Bangladesh)" ; LANGUAGE_BENGALI_BANGLADESH ; > ;
        < "Occitan" ; LANGUAGE_USER_OCCITAN ; > ;
        < "Khmer" ; LANGUAGE_KHMER ; > ;
        < "Kurdish (Turkey)" ; LANGUAGE_USER_KURDISH_TURKEY ; > ;
        < "Kurdish (Syria)" ; LANGUAGE_USER_KURDISH_SYRIA ; > ;
        < "Kurdish (Iraq)" ; LANGUAGE_USER_KURDISH_IRAQ ; > ;
        < "Kurdish (Iran)" ; LANGUAGE_USER_KURDISH_IRAN ; > ;
        < "Kurdish, Northern (Turkey)" ; LANGUAGE_USER_KURDISH_TURKEY ; > ;
        < "Kurdish, Northern (Syria)" ; LANGUAGE_USER_KURDISH_SYRIA ; > ;
        < "Kurdish, Central (Iraq)" ; LANGUAGE_USER_KURDISH_IRAQ ; > ;
        < "Kurdish, Central (Iran)" ; LANGUAGE_USER_KURDISH_IRAN ; > ;
        < "Kurdish, Southern (Iraq)" ; LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ ; > ;
        < "Sardinian" ; LANGUAGE_USER_SARDINIAN ; > ;
        < "Dzongkha" ; LANGUAGE_DZONGKHA ; > ;
        < "Swahili (Tanzania)" ; LANGUAGE_USER_SWAHILI_TANZANIA ; > ;