ಮೋಡ್ಯೂಲ್:Multilingual description

Documentation for this module may be created at ಮೋಡ್ಯೂಲ್:Multilingual description/doc

local p = {}

--[==[
  Remap some "valid" language codes that are still unknown, but are known by another code
  in order to get a visible language name (and if possible, BCP47 conformance)!
--]==]
local remappedLanguages = {
    ['als'] = 'gsw', -- legacy broken codes (but known) to changed new code (also known, but conforming to BCP47)
    ['bat-smg'] = 'sgs', -- same remark
    ['be-x-old'] = 'be-tarask', -- same remark
    ['bh'] = 'bho', -- same remark
    ['bu'] = 'my', -- same remark
    ['fiu-vro'] = 'vro', -- same remark
    ['nrm'] = 'nrf', -- same remark (usage of 'nrm' on Wikimedia for Norman conflicts with the standard 'nrm' which actually refers to an unrelated language)
    ['simple'] = 'en', -- same remark
    ['zh-classical'] = 'lzh', -- same remark
    ['zh-min-nan'] = 'nan', -- same remark
    ['zh-wuu'] = 'wuu', -- same remark
    ['zh-yue'] = 'yue', -- same remark

    ['en-us'] = 'en', -- both codes are conforming and supported, only the second one is known
    ['fa-af'] = 'fa', -- both codes are conforming and supported, only the second one is known, actually means "Eastern Dari"
    ['fr-x-galo'] = 'fr', -- both codes are conforming and supported, only the second one is known
    ['ha-latn'] = 'ha', -- both codes are conforming and supported, only the second one is known, the Latin script is the default since the 1950's.
    ['ha-arab'] = 'ha', -- both codes are conforming and supported, only the second one is known, the Arabic script is historic, without clear orthography
    ['ko-kr'] = 'ko', -- both codes are conforming and supported, only the second one is known
    ['ku-latn'] = 'ku', -- both codes are conforming and supported, only the second one is known, the Latin script is the default since the 1950's.
    ['ku-cyrl'] = 'ku', -- both codes are conforming and supported, only the second one is known, the Cyrillic script is still used
    ['no'] = 'nb', -- both codes are conforming and supported, but the 1st one is now used only for meaning the second one in MediaWiki
    ['prd'] = 'fa', -- both codes are conforming and supported, only the second one is known, prd is "Parsi-Dari"
    ['tgl'] = 'tl', -- both codes are conforming and supported, only the second one is known

    ['sr-cyrl'] = 'sr-ec', -- this alternate known code is non-standard and in fact not supported, but has a correct native name
    ['sr-latn'] = 'sr-el', -- same remark
}

-- kind is either 'deprecated' or 'unsupported'
local function addTracking(descriptions, kind)
    local categorize
    if kind == 'deprecated' then
        categorize = ' using deprecated language codes'
    else
        categorize = ' using unsupported language codes'
    end
    table.insert(descriptions, '[[Category:Multilingual descriptions' .. categorize .. ']]')
end

local sortedKnownLanguageTags = require('Module:Multilingual description/sort')
local dir = require('Module:Dir').select
local function addDescription(descriptions, lang, description, update)
    if type(description) == 'string' then
        description = mw.text.trim(description)
        if description:len() > 0 then
            table.insert(descriptions, mw.getCurrentFrame():expandTemplate{
                title = 'Ls',
                args = {
                    lang,
                    description,
                    dir = dir(lang, 'rtl', 'ltr'),
                    classes = 'description',
                    update = update
                }
            })
        end
    end
end

local function _mld(args)
    -- Shallow copy of arguments (because keys in args cannot be unset if args is hollow, in a parent frame outside Lua)
    -- DO NOT copy the metatable that exposes only a *read-only* interface with accessors to PHP arrays (mw.clone does NOT work)!
    local descriptions = {}
    for lang, description in pairs(args) do
        if type(lang) == 'string' and type(description) == 'string' then
            -- MediaWiki trims the names of named argument and their values, but does not remove HTML comments in these names
            -- (some Mld contain parameters like "| sk <!--Slovensko--> = ...")
            lang = lang:gsub('<!%-%-.-%-%->', ''):gsub('^%s*(.-)%s*$','%1')
            descriptions[lang] = description
        end
    end
    args = descriptions
    descriptions = {}
    local remapped = false
    for cur, alt in pairs(remappedLanguages) do
        if args[cur] and not(mw.language.isSupportedLanguage(cur) and mw.language.isKnownLanguageTag(cur))
                and (mw.language.isSupportedLanguage(alt) and mw.language.isKnownLanguageTag(alt)) then
            remapped = true
            if args[alt] == nil then -- only if this does not conflict
                args[alt] = args[cur] -- set key for alternate known language
            end
            args[cur] = nil -- unset the standard unknown key
        end
    end
    -- First all known languages in order if they have description
    for _, lang in ipairs(sortedKnownLanguageTags) do
        if args[lang] ~= nil then
            addDescription(descriptions, lang, args[lang], nil)
            args[lang] = nil
        end
    end
    -- Append other unknown languages, but only if they are supported
    local unsupported = false
    for lang, description in pairs(args) do
        if mw.language.isSupportedLanguage(lang) then
            addDescription(descriptions, lang, description, nil)
        else
            addDescription(descriptions, lang, description, lang)
            unsupported = true
        end
    end
    if remapped then
        addTracking(descriptions, 'deprecated')
    end
    if unsupported then
        addTracking(descriptions, 'unsupported')
    end
--mw.logObject(descriptions)
    return table.concat(descriptions)
end

function p.mld(frame)
    local args = (frame:getParent() or {}).args or {}
    return _mld(args)
end

setmetatable(p, {quickTests = function()
    local input = {
        [1] = 'One?', -- discarded
        unsupported = 'What?', -- unsupported
        en = ' ', -- empty description after trimming (discarded)
        als = 'GSW', -- will be remapped
        ['en-gb'] = 'EN-GB ', -- trimming at end
        ['en-ca'] = 'EN-CA <nowiki/>',
        de = ' DE', -- trimming at start
        fr = ' FR ', -- trimming both ends
        rue = 'RUE',
        ru = 'RU',
        ko = 'KO',
        ja = 'JA',
        zh = 'ZH',
        he = 'HE',
        ur = 'UR',
        ar = 'AR',
        ro = 'RO',
        ['be-tarask'] = 'BE-TARASK',
        dv = 'DV',
    }
    local expect = {}
    --[=[
    This is the exact order to expect according to native language names,
    and after discarding empty descriptions or unsupported language codes.
    --]=]
    addDescription(expect, 'gsw', 'GSW') --[[Alemannisch]] -- remapped
    addDescription(expect, 'en-gb', 'EN-GB') --[[British English]]
    addDescription(expect, 'en-ca', 'EN-CA <nowiki/>') --[[Canadian English]]
    addDescription(expect, 'de', 'DE') --[[Deutsch]]
    addDescription(expect, 'fr', 'FR') --[[français]]
    addDescription(expect, 'ro', 'RO') --[[română]]
    addDescription(expect, 'be-tarask', 'BE-TARASK') --[[беларуская (тарашкевіца)]]
    addDescription(expect, 'rue', 'RUE') --[[русиньскый]]
    addDescription(expect, 'ru', 'RU') --[[русский]]
    addDescription(expect, 'ko', 'KO') --[[한국어]]
    addDescription(expect, 'ja', 'JA') --[[日本語]]
    addDescription(expect, 'zh', 'ZH') --[[中文]]
    addDescription(expect, 'he', 'HE') --[[עברית]]
    addDescription(expect, 'ur', 'UR') --[[اردو]]
    addDescription(expect, 'ar', 'AR') --[[العربية]]
    addDescription(expect, 'dv', 'DV') --[[ދިވެހިބަސް]]
    addDescription(expect, 'unsupported', 'What?', 'unsupported')
    addTracking(expect, 'deprecated')
    addTracking(expect, 'unsupported')
    expect = table.concat(expect)
    local actual = _mld(input)
    if (actual ~= expect) then
        mw.log('expect:\n' .. expect)
        mw.log('actual:\n' .. actual)
        return false
    end
    return true
end})
--[==[ Type this to run tests in the Lua console:
=getmetatable(p).quickTests() -- should return true
--]==]
return p