Module:Dir/sandbox

--[==[ This module implements Template:Dir.

Enter this to run tests in the Lua console: =getmetatable(p).quickTests

--]==] require('strict')

local overrides = require('Module:Dir/RTL overrides/sandbox') local language = mw.language local languageNew = language.new -- some method calls or property reads are expensive and limited to 20 querying languages per MediaWiki instance. local sort = table.sort local log = mw.log

local function trim(s) if not s or s == '' then return nil else return tostring(s):match('^%s*(.-)%s*$') end end

local function isRTL(lang) if type(lang) ~= 'string' then return nil end lang = trim(lang) if not lang then return nil end --   Normalize the language code to lowercase with hyphen separators, and check if it's wellformed.    Valid language tags are hyphen-separated lists of subtags, each subtag having 1 to 8 letters or digits, but    the 1st subtag can only contains letters. BCP 47 (RFC 5647, September 2009) defines this ABNF syntax:    -   Language-Tag = langtag / privateuse / grandfathered    -   privateuse = "x" 1*("-" (1*8alphanum))    -   langtag = language ["-" script] ["-" region] *("-" variant) *("-" extension) ["-" privateuse]    -   language = 2*3ALPHA ["-" extlang]    Wikimedia uses some legacy codes that match these rules but do not respect BCP 47 because they are    unregistered or map to unrelated languages. It also still uses one grandfathered tag (zh-min-nan).    -- lang = lang:gsub('_', '-'):lower if lang:find('^[a-z][%-0-9a-z]*[0-9a-z]$') ~= 1 then return nil end -- Very fast lookup of language code, and not limited in the number of supported languages. local v = overrides[lang] if v then return v end -- Return it if it's mapped. --   Check if there's a 4-letter script variant subtag (from ISO 15924) after an hyphen.    Note that Wikimedia uses the legacy code 'roa-tara', not conforming to BCP 47, where 'tara' would    be reserved for a script code, but this still doesn't conflict with current ISO 15924 script codes.    If ever ISO 15924 assigns it, this would only affect 'roa' which is not even a conforming code for    an ISO 639-1/2/3 language, but only for an ISO 639-5 family, but we first check the full 'roa-tara'    above as an override before looking up generic script codes with other language codes here.    -- v = lang:find('%-[a-z][a-z][a-z][a-z]$') or lang:find('%-[a-z][a-z][a-z][a-z]%-') if v then v = overrides[lang:sub(v, v + 4)] if v then return v end -- Return it if it's mapped. end -- Try using MediaWiki library. local success, ret = pcall(function       return languageNew(lang):isRTL -- Expensive and limited to 20 languages per MediaWiki instance.    end) return success and ret end

local function select(lang, rtl, ltr) if isRTL(lang) then return rtl else return ltr end end

--[==[ Used via a template which just calls, the 3 parameters are automatically trimmed: --]==] local function main(frame) local args = frame:getParent.args -- Parameters used to transclude Template:Dir local code = trim(args[1]) or frame:callParserFunction('Int', 'Lang') local rtl = trim(args.rtl or args[2] or 'rtl') or '' local ltr = trim(args.ltr or args[3] or 'ltr') or '' return select(code, rtl, ltr) end
 * Parameter 1 takes the default value from if it's empty or not specified.
 * Parameter 2 can be named rtl, may be explicitly empty, otherwise takes the default value 'rtl' only if it's not specified.
 * Parameter 3 can be named ltr, may be explicitly empty, otherwise takes the default value 'ltr' only if it's not specified.

-- Exported functions. return setmetatable({   isRTL = isRTL,    select = select,    main = main }, {    quickTests = function        local ltrLangs, rtlLangs = overrides[false], overrides[true]

-- Basic check of data format. local function checkLangs(name, langs) for k, lang in pairs(langs) do               assert(type(k) == 'number' and k == math.floor(k)                    and type(lang) == 'string' and #lang >= 2 and #lang <= 16                    and (lang:find('^[a-z][%-0-9a-z]*[0-9a-z]$') == 1 or lang:find('^%-[a-z][a-z][a-z][a-z]$') == 1),                    ": invalid sequence of lowercase language codes, " .. tostring(name) .. "['" .. tostring(k) .. "'] = '" .. tostring(lang) .. "'") end return true end

local v, msg v, msg = pcall(checkLangs, 'rtlLangs', rtlLangs) if not v then return false, msg end v, msg = pcall(checkLangs, 'ltrLangs', ltrLangs) if not v then return false, msg end

-- Build inverse maps of languages having each direction. local isLTR, isRTL = {}, {} for _, lang in ipairs(ltrLangs) do isLTR[lang] = true end for _, lang in ipairs(rtlLangs) do isRTL[lang] = true end

-- Check conflicts using the two inverse maps. for _, lang in ipairs(rtlLangs) do           if isLTR[lang] then return false, ": direction conflict for language '" .. lang .. "'" end end for _, lang in ipairs(ltrLangs) do           if isRTL[lang] then return false, ": direction conflict for language '" .. lang .. "'" end end

-- Log missing languages (allows filling the tables above) according to MediaWiki internal data local knownLangs, isKnownLang = language.fetchLanguageNames, {} for lang, name in pairs(knownLangs) do           isKnownLang[lang] = true if overrides[lang] == nil then -- Only if we still don't have local data for this language. -- Check if it has a 4-letter script variant subtag (from ISO 15924) after an hyphen. v = lang:find('%-[a-z][a-z][a-z][a-z]$') or lang:find('%-[a-z][a-z][a-z][a-z]%-') if v and overrides[lang:sub(v, v + 4)] == nil then -- Only if we still don't have local data for the script variant. -- Note: we cannot check more than 20 languages at once, then MediaWiki raises an error. -- So this test only runs on the Lua console, where you can update the tables at top. -- This also means we cannot compare what MediaWiki returns with the direction we map here for all languages. v, value = pcall(function return tostring(languageNew(lang):isRTL) end) log("Warning: missing direction for language '" .. lang .. "' (" .. name .."), MediaWiki returns '" .. value .. "'") end end end

-- Utility: reverse order iterator on sequences. local function revipairs(t) return function(t, i)               i = i - 1 local v = t[i] if v then return i, v end return nil end, t, #t + 1 end

-- Sort and deduplicate language code values (by scanning backward) for data cleanup. -- Also log languages having a direction mapping in this module but still not known by MediaWiki. sort(ltrLangs) for i, lang in revipairs(ltrLangs) do           if ltrLangs[i - 1] == ltrLangs[i] then table.remove(ltrLangs, i) end if lang:sub(1, 1) ~= '-' and not isKnownLang[lang] then log("LTR language '" .. lang .. "' not known by Mediawiki (possibly missing alias or variant)") end end sort(rtlLangs) for i, lang in revipairs(rtlLangs) do           if rtlLangs[i - 1] == rtlLangs[i] then table.remove(rtlLangs, i) end if lang:sub(1, 1) ~= '-' and not isKnownLang[lang] then log("RTL language '" .. lang .. "' not known by Mediawiki (possibly missing alias or variant)") end end

-- Final presentation of current lists, sorted and deduplicated. log('') log('') log("local rtlLangs = { '" .. table.concat(rtlLangs, "', '") .. "' }") log('') log("local ltrLangs = { '" .. table.concat(ltrLangs, "', '") .. "' }") log('') return true end })