Modul:Text: Unterschied zwischen den Versionen
te>PerfektesChaos (update) |
Admin (Diskussion | Beiträge) K (30 Versionen importiert) |
||
(28 dazwischenliegende Versionen von 6 Benutzern werden nicht angezeigt) | |||
Zeile 1: | Zeile 1: | ||
− | --[=[ | + | local Text = { serial = "2018-11-14", |
+ | suite = "Text" } | ||
+ | --[=[ | ||
Text utilities | Text utilities | ||
]=] | ]=] | ||
Zeile 5: | Zeile 7: | ||
− | local Text = { } | + | -- local globals |
− | local | + | local PatternCJK = false |
− | local | + | local PatternCombined = false |
− | local | + | local PatternLatin = false |
+ | local PatternTerminated = false | ||
+ | local RangesLatin = false | ||
+ | local SeekQuote = false | ||
+ | |||
+ | |||
+ | |||
+ | local function factoryQuote() | ||
+ | -- Create quote definitions | ||
+ | if not Text.quoteLang then | ||
+ | local lucky, quoting = pcall( mw.loadData, | ||
+ | "Module:Text/quoting" ) | ||
+ | if type( quoting ) == "table" then | ||
+ | Text.quoteLang = quoting.langs | ||
+ | Text.quoteType = quoting.types | ||
+ | end | ||
+ | if type( Text.quoteLang ) ~= "table" then | ||
+ | Text.quoteLang = { } | ||
+ | end | ||
+ | if type( Text.quoteType ) ~= "table" then | ||
+ | Text.quoteType = { } | ||
+ | end | ||
+ | if type( Text.quoteLang.en ) ~= "string" then | ||
+ | Text.quoteLang.en = "ld" | ||
+ | end | ||
+ | if type( Text.quoteType[ Text.quoteLang.en ] ) ~= "table" then | ||
+ | Text.quoteType[ Text.quoteLang.en ] = { { 8220, 8221 }, | ||
+ | { 8216, 8217 } } | ||
+ | end | ||
+ | end | ||
+ | end -- factoryQuote() | ||
+ | |||
+ | |||
+ | |||
+ | local function fiatQuote( apply, alien, advance ) | ||
+ | -- Quote text | ||
+ | -- Parameter: | ||
+ | -- apply -- string, with text | ||
+ | -- alien -- string, with language code | ||
+ | -- advance -- number, with level 1 or 2 | ||
+ | local r = apply | ||
+ | local suite | ||
+ | factoryQuote() | ||
+ | suite = Text.quoteLang[ alien ] | ||
+ | if not suite then | ||
+ | local slang = alien:match( "^(%l+)-" ) | ||
+ | if slang then | ||
+ | suite = Text.quoteLang[ slang ] | ||
+ | end | ||
+ | if not suite then | ||
+ | suite = Text.quoteLang.en | ||
+ | end | ||
+ | end | ||
+ | if suite then | ||
+ | local quotes = Text.quoteType[ suite ] | ||
+ | if quotes then | ||
+ | local space | ||
+ | if quotes[ 3 ] then | ||
+ | space = " " | ||
+ | else | ||
+ | space = "" | ||
+ | end | ||
+ | quotes = quotes[ advance ] | ||
+ | if quotes then | ||
+ | r = mw.ustring.format( "%s%s%s%s%s", | ||
+ | mw.ustring.char( quotes[ 1 ] ), | ||
+ | space, | ||
+ | apply, | ||
+ | space, | ||
+ | mw.ustring.char( quotes[ 2 ] ) ) | ||
+ | end | ||
+ | else | ||
+ | mw.log( "fiatQuote() " .. suite ) | ||
+ | end | ||
+ | end | ||
+ | return r | ||
+ | end -- fiatQuote() | ||
+ | |||
+ | |||
+ | |||
+ | Text.char = function ( apply, again, accept ) | ||
+ | -- Create string from codepoints | ||
+ | -- Parameter: | ||
+ | -- apply -- table (sequence) with numerical codepoints, or nil | ||
+ | -- again -- number of repetitions, or nil | ||
+ | -- accept -- true, if no error messages to be appended | ||
+ | -- Returns: string | ||
+ | local r | ||
+ | if type( apply ) == "table" then | ||
+ | local bad = { } | ||
+ | local codes = { } | ||
+ | local s | ||
+ | for k, v in pairs( apply ) do | ||
+ | s = type( v ) | ||
+ | if s == "number" then | ||
+ | if v < 32 and v ~= 9 and v ~= 10 then | ||
+ | v = tostring( v ) | ||
+ | else | ||
+ | v = math.floor( v ) | ||
+ | s = false | ||
+ | end | ||
+ | elseif s ~= "string" then | ||
+ | v = tostring( v ) | ||
+ | end | ||
+ | if s then | ||
+ | table.insert( bad, v ) | ||
+ | else | ||
+ | table.insert( codes, v ) | ||
+ | end | ||
+ | end -- for k, v | ||
+ | if #bad == 0 then | ||
+ | if #codes > 0 then | ||
+ | r = mw.ustring.char( unpack( codes ) ) | ||
+ | if again then | ||
+ | if type( again ) == "number" then | ||
+ | local n = math.floor( again ) | ||
+ | if n > 1 then | ||
+ | r = r:rep( n ) | ||
+ | elseif n < 1 then | ||
+ | r = "" | ||
+ | end | ||
+ | else | ||
+ | s = "bad repetitions: " .. tostring( again ) | ||
+ | end | ||
+ | end | ||
+ | end | ||
+ | else | ||
+ | s = "bad codepoints: " .. table.concat( bad, " " ) | ||
+ | end | ||
+ | if s and not accept then | ||
+ | r = tostring( mw.html.create( "span" ) | ||
+ | :addClass( "error" ) | ||
+ | :wikitext( s ) ) | ||
+ | end | ||
+ | end | ||
+ | return r or "" | ||
+ | end -- Text.char() | ||
+ | |||
+ | |||
+ | |||
+ | Text.concatParams = function ( args, apply, adapt ) | ||
+ | -- Concat list items into one string | ||
+ | -- Parameter: | ||
+ | -- args -- table (sequence) with numKey=string | ||
+ | -- apply -- string (optional); separator (default: "|") | ||
+ | -- adapt -- string (optional); format including "%s" | ||
+ | -- Returns: string | ||
+ | local collect = { } | ||
+ | for k, v in pairs( args ) do | ||
+ | if type( k ) == "number" then | ||
+ | v = mw.text.trim( v ) | ||
+ | if v ~= "" then | ||
+ | if adapt then | ||
+ | v = mw.ustring.format( adapt, v ) | ||
+ | end | ||
+ | table.insert( collect, v ) | ||
+ | end | ||
+ | end | ||
+ | end -- for k, v | ||
+ | return table.concat( collect, apply or "|" ) | ||
+ | end -- Text.concatParams() | ||
Zeile 17: | Zeile 179: | ||
-- analyse -- string | -- analyse -- string | ||
-- Returns: true, if CJK detected | -- Returns: true, if CJK detected | ||
+ | local r | ||
if not patternCJK then | if not patternCJK then | ||
patternCJK = mw.ustring.char( 91, | patternCJK = mw.ustring.char( 91, | ||
Zeile 30: | Zeile 193: | ||
return r | return r | ||
end -- Text.containsCJK() | end -- Text.containsCJK() | ||
+ | |||
+ | |||
+ | |||
+ | Text.getPlain = function ( adjust ) | ||
+ | -- Remove wikisyntax from string, except templates | ||
+ | -- Parameter: | ||
+ | -- adjust -- string | ||
+ | -- Returns: string | ||
+ | local i = adjust:find( "<!--", 1, true ) | ||
+ | local r = adjust | ||
+ | local j | ||
+ | while i do | ||
+ | j = r:find( "-->", i + 3, true ) | ||
+ | if j then | ||
+ | r = r:sub( 1, i ) .. r:sub( j + 3 ) | ||
+ | else | ||
+ | r = r:sub( 1, i ) | ||
+ | end | ||
+ | i = r:find( "<!--", i, true ) | ||
+ | end -- "<!--" | ||
+ | r = r:gsub( "(</?%l[^>]*>)", "" ) | ||
+ | :gsub( "'''(.+)'''", "%1" ) | ||
+ | :gsub( "''(.+)''", "%1" ) | ||
+ | :gsub( " ", " " ) | ||
+ | return r | ||
+ | end -- Text.getPlain() | ||
+ | |||
+ | |||
+ | |||
+ | Text.isLatinRange = function ( adjust ) | ||
+ | -- Are characters expected to be latin or symbols within latin texts? | ||
+ | -- Precondition: | ||
+ | -- adjust -- string, or nil for initialization | ||
+ | -- Returns: true, if valid for latin only | ||
+ | local r | ||
+ | if not RangesLatin then | ||
+ | RangesLatin = { { 7, 687 }, | ||
+ | { 7531, 7578 }, | ||
+ | { 7680, 7935 }, | ||
+ | { 8194, 8250 } } | ||
+ | end | ||
+ | if not PatternLatin then | ||
+ | local range | ||
+ | PatternLatin = "^[" | ||
+ | for i = 1, #RangesLatin do | ||
+ | range = RangesLatin[ i ] | ||
+ | PatternLatin = PatternLatin .. | ||
+ | mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) | ||
+ | end -- for i | ||
+ | PatternLatin = PatternLatin .. "]*$" | ||
+ | end | ||
+ | if adjust then | ||
+ | if mw.ustring.match( adjust, PatternLatin ) then | ||
+ | r = true | ||
+ | else | ||
+ | r = false | ||
+ | end | ||
+ | end | ||
+ | return r | ||
+ | end -- Text.isLatinRange() | ||
+ | |||
+ | |||
+ | |||
+ | Text.isQuote = function ( ask ) | ||
+ | -- Is this character any quotation mark? | ||
+ | -- Parameter: | ||
+ | -- ask -- string, with single character | ||
+ | -- Returns: true, if ask is quotation mark | ||
+ | local r | ||
+ | if not SeekQuote then | ||
+ | SeekQuote = mw.ustring.char( 34, -- " | ||
+ | 39, -- ' | ||
+ | 171, -- laquo | ||
+ | 187, -- raquo | ||
+ | 8216, -- lsquo | ||
+ | 8217, -- rsquo | ||
+ | 8218, -- sbquo | ||
+ | 8220, -- ldquo | ||
+ | 8221, -- rdquo | ||
+ | 8222, -- bdquo | ||
+ | 8249, -- lsaquo | ||
+ | 8250, -- rsaquo | ||
+ | 0x300C, -- CJK | ||
+ | 0x300D, -- CJK | ||
+ | 0x300E, -- CJK | ||
+ | 0x300F ) -- CJK | ||
+ | end | ||
+ | if ask == "" then | ||
+ | r = false | ||
+ | elseif mw.ustring.find( SeekQuote, ask, 1, true ) then | ||
+ | r = true | ||
+ | else | ||
+ | r = false | ||
+ | end | ||
+ | return r | ||
+ | end -- Text.isQuote() | ||
+ | |||
+ | |||
+ | |||
+ | Text.listToText = function ( args, adapt ) | ||
+ | -- Format list items similar to mw.text.listToText() | ||
+ | -- Parameter: | ||
+ | -- args -- table (sequence) with numKey=string | ||
+ | -- adapt -- string (optional); format including "%s" | ||
+ | -- Returns: string | ||
+ | local collect = { } | ||
+ | for k, v in pairs( args ) do | ||
+ | if type( k ) == "number" then | ||
+ | v = mw.text.trim( v ) | ||
+ | if v ~= "" then | ||
+ | if adapt then | ||
+ | v = mw.ustring.format( adapt, v ) | ||
+ | end | ||
+ | table.insert( collect, v ) | ||
+ | end | ||
+ | end | ||
+ | end -- for k, v | ||
+ | return mw.text.listToText( collect ) | ||
+ | end -- Text.listToText() | ||
+ | |||
+ | |||
+ | |||
+ | Text.quote = function ( apply, alien, advance ) | ||
+ | -- Quote text | ||
+ | -- Parameter: | ||
+ | -- apply -- string, with text | ||
+ | -- alien -- string, with language code, or nil | ||
+ | -- advance -- number, with level 1 or 2, or nil | ||
+ | -- Returns: quoted string | ||
+ | local mode, slang | ||
+ | if type( alien ) == "string" then | ||
+ | slang = mw.text.trim( alien ):lower() | ||
+ | else | ||
+ | slang = mw.title.getCurrentTitle().pageLanguage | ||
+ | if not slang then | ||
+ | -- TODO FIXME: Introduction expected 2017-04 | ||
+ | slang = mw.language.getContentLanguage():getCode() | ||
+ | end | ||
+ | end | ||
+ | if advance == 2 then | ||
+ | mode = 2 | ||
+ | else | ||
+ | mode = 1 | ||
+ | end | ||
+ | return fiatQuote( mw.text.trim( apply ), slang, mode ) | ||
+ | end -- Text.quote() | ||
+ | |||
+ | |||
+ | |||
+ | Text.quoteUnquoted = function ( apply, alien, advance ) | ||
+ | -- Quote text, if not yet quoted and not empty | ||
+ | -- Parameter: | ||
+ | -- apply -- string, with text | ||
+ | -- alien -- string, with language code, or nil | ||
+ | -- advance -- number, with level 1 or 2, or nil | ||
+ | -- Returns: string; possibly quoted | ||
+ | local r = mw.text.trim( apply ) | ||
+ | local s = mw.ustring.sub( r, 1, 1 ) | ||
+ | if s ~= "" and not Text.isQuote( s, advance ) then | ||
+ | s = mw.ustring.sub( r, -1, 1 ) | ||
+ | if not Text.isQuote( s ) then | ||
+ | r = Text.quote( r, alien, advance ) | ||
+ | end | ||
+ | end | ||
+ | return r | ||
+ | end -- Text.quoteUnquoted() | ||
+ | |||
+ | |||
+ | |||
+ | Text.removeDiacritics = function ( adjust ) | ||
+ | -- Remove all diacritics | ||
+ | -- Parameter: | ||
+ | -- adjust -- string | ||
+ | -- Returns: string; all latin letters should be ASCII | ||
+ | -- or basic greek or cyrillic or symbols etc. | ||
+ | local cleanup, decomposed | ||
+ | if not PatternCombined then | ||
+ | PatternCombined = mw.ustring.char( 91, | ||
+ | 0x0300, 45, 0x036F, | ||
+ | 0x1AB0, 45, 0x1AFF, | ||
+ | 0x1DC0, 45, 0x1DFF, | ||
+ | 0xFE20, 45, 0xFE2F, | ||
+ | 93 ) | ||
+ | end | ||
+ | decomposed = mw.ustring.toNFD( adjust ) | ||
+ | cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" ) | ||
+ | return mw.ustring.toNFC( cleanup ) | ||
+ | end -- Text.removeDiacritics() | ||
Zeile 40: | Zeile 391: | ||
-- Returns: true, if sentence terminated | -- Returns: true, if sentence terminated | ||
local r | local r | ||
− | if not | + | if not PatternTerminated then |
− | + | PatternTerminated = mw.ustring.char( 91, | |
12290, | 12290, | ||
65281, | 65281, | ||
Zeile 48: | Zeile 399: | ||
.. "!%.%?…][\"'%]‹›«»‘’“”]*$" | .. "!%.%?…][\"'%]‹›«»‘’“”]*$" | ||
end | end | ||
− | if mw.ustring.find( analyse, | + | if mw.ustring.find( analyse, PatternTerminated ) then |
r = true | r = true | ||
else | else | ||
Zeile 55: | Zeile 406: | ||
return r | return r | ||
end -- Text.sentenceTerminated() | end -- Text.sentenceTerminated() | ||
+ | |||
+ | |||
+ | |||
+ | Text.ucfirstAll = function ( adjust ) | ||
+ | -- Capitalize all words | ||
+ | -- Precondition: | ||
+ | -- adjust -- string | ||
+ | -- Returns: string with all first letters in upper case | ||
+ | local r = " " .. adjust | ||
+ | local i = 1 | ||
+ | local c, j, m | ||
+ | if adjust:find( "&" ) then | ||
+ | r = r:gsub( "&", "&" ) | ||
+ | :gsub( "<", "<" ) | ||
+ | :gsub( ">", ">" ) | ||
+ | :gsub( " ", " " ) | ||
+ | :gsub( " ", " " ) | ||
+ | :gsub( "‌", "‌" ) | ||
+ | :gsub( "‍", "‍" ) | ||
+ | :gsub( "‎", "‎" ) | ||
+ | :gsub( "‏", "‏" ) | ||
+ | m = true | ||
+ | end | ||
+ | while i do | ||
+ | i = mw.ustring.find( r, "%W%l", i ) | ||
+ | if i then | ||
+ | j = i + 1 | ||
+ | c = mw.ustring.upper( mw.ustring.sub( r, j, j ) ) | ||
+ | r = string.format( "%s%s%s", | ||
+ | mw.ustring.sub( r, 1, i ), | ||
+ | c, | ||
+ | mw.ustring.sub( r, i + 2 ) ) | ||
+ | i = j | ||
+ | end | ||
+ | end -- while i | ||
+ | r = r:sub( 2 ) | ||
+ | if m then | ||
+ | r = r:gsub( "&", "&" ) | ||
+ | :gsub( "<", "<" ) | ||
+ | :gsub( ">", ">" ) | ||
+ | :gsub( " ", " " ) | ||
+ | :gsub( " ", " " ) | ||
+ | :gsub( "‌", "‌" ) | ||
+ | :gsub( "‍", "‍" ) | ||
+ | :gsub( "‎", "‎" ) | ||
+ | :gsub( "‏", "‏" ) | ||
+ | :gsub( "&#X(%x+);", "&#x%1;" ) | ||
+ | end | ||
+ | return r | ||
+ | end -- Text.ucfirstAll() | ||
Zeile 65: | Zeile 466: | ||
-- Returns: string with non-latin parts enclosed in <span> | -- Returns: string with non-latin parts enclosed in <span> | ||
local r | local r | ||
− | + | Text.isLatinRange() | |
− | + | if mw.ustring.match( adjust, PatternLatin ) then | |
− | |||
− | |||
− | |||
− | |||
− | if mw.ustring.match( adjust, | ||
-- latin only, horizontal dashes, quotes | -- latin only, horizontal dashes, quotes | ||
r = adjust | r = adjust | ||
Zeile 80: | Zeile 476: | ||
local m = false | local m = false | ||
local n = mw.ustring.len( adjust ) | local n = mw.ustring.len( adjust ) | ||
− | local span = "%s%s<span style='font-style:normal'>%s</span>" | + | local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>" |
local flat = function ( a ) | local flat = function ( a ) | ||
− | + | -- isLatin | |
− | + | local range | |
− | + | for i = 1, #RangesLatin do | |
+ | range = RangesLatin[ i ] | ||
+ | if a >= range[ 1 ] and a <= range[ 2 ] then | ||
+ | return true | ||
+ | end | ||
+ | end -- for i | ||
+ | end -- flat() | ||
+ | local focus = function ( a ) | ||
+ | -- char is not ambivalent | ||
+ | local r = ( a > 64 ) | ||
+ | if r then | ||
+ | r = ( a < 8192 or a > 8212 ) | ||
+ | else | ||
+ | r = ( a == 38 or a == 60 ) -- '&' '<' | ||
+ | end | ||
+ | return r | ||
+ | end -- focus() | ||
local form = function ( a ) | local form = function ( a ) | ||
return string.format( span, | return string.format( span, | ||
Zeile 90: | Zeile 502: | ||
mw.ustring.sub( adjust, k, j - 1 ), | mw.ustring.sub( adjust, k, j - 1 ), | ||
mw.ustring.sub( adjust, j, a ) ) | mw.ustring.sub( adjust, j, a ) ) | ||
− | + | end -- form() | |
r = "" | r = "" | ||
for i = 1, n do | for i = 1, n do | ||
c = mw.ustring.codepoint( adjust, i, i ) | c = mw.ustring.codepoint( adjust, i, i ) | ||
− | if c | + | if focus( c ) then |
if flat( c ) then | if flat( c ) then | ||
if j then | if j then | ||
Zeile 105: | Zeile 517: | ||
end | end | ||
if j then | if j then | ||
− | r = form( | + | local nx = i - 1 |
+ | local s = "" | ||
+ | for ix = nx, 1, -1 do | ||
+ | c = mw.ustring.sub( adjust, ix, ix ) | ||
+ | if c == " " or c == "(" then | ||
+ | nx = nx - 1 | ||
+ | s = c .. s | ||
+ | else | ||
+ | break -- for ix | ||
+ | end | ||
+ | end -- for ix | ||
+ | r = form( nx ) .. s | ||
j = false | j = false | ||
k = i | k = i | ||
Zeile 122: | Zeile 545: | ||
m = m + 1 | m = m + 1 | ||
end | end | ||
− | end -- for i | + | end -- for i |
− | if j and m < n then | + | if j and ( not m or m < n ) then |
r = form( n ) | r = form( n ) | ||
else | else | ||
Zeile 131: | Zeile 554: | ||
return r | return r | ||
end -- Text.uprightNonlatin() | end -- Text.uprightNonlatin() | ||
+ | |||
+ | |||
+ | |||
+ | Text.test = function ( about ) | ||
+ | local r | ||
+ | if about == "quote" then | ||
+ | factoryQuote() | ||
+ | r = { QuoteLang = Text.quoteLang, | ||
+ | QuoteType = Text.quoteType } | ||
+ | end | ||
+ | return r | ||
+ | end -- Text.test() | ||
Zeile 136: | Zeile 571: | ||
-- Export | -- Export | ||
local p = { } | local p = { } | ||
+ | |||
+ | function p.char( frame ) | ||
+ | local params = frame:getParent().args | ||
+ | local story = params[ 1 ] | ||
+ | local codes, lenient, multiple | ||
+ | if not story then | ||
+ | params = frame.args | ||
+ | story = params[ 1 ] | ||
+ | end | ||
+ | if story then | ||
+ | local items = mw.text.split( story, "%s+" ) | ||
+ | if #items > 0 then | ||
+ | local j | ||
+ | lenient = ( params.errors == "0" ) | ||
+ | codes = { } | ||
+ | multiple = tonumber( params[ "*" ] ) | ||
+ | for k, v in pairs( items ) do | ||
+ | if v:sub( 1, 1 ) == "x" then | ||
+ | j = tonumber( "0" .. v ) | ||
+ | elseif v == "" then | ||
+ | v = false | ||
+ | else | ||
+ | j = tonumber( v ) | ||
+ | end | ||
+ | if v then | ||
+ | table.insert( codes, j or v ) | ||
+ | end | ||
+ | end -- for k, v | ||
+ | end | ||
+ | end | ||
+ | return Text.char( codes, multiple, lenient ) | ||
+ | end | ||
+ | |||
+ | function p.concatParams( frame ) | ||
+ | local args | ||
+ | local template = frame.args.template | ||
+ | if type( template ) == "string" then | ||
+ | template = mw.text.trim( template ) | ||
+ | template = ( template == "1" ) | ||
+ | end | ||
+ | if template then | ||
+ | args = frame:getParent().args | ||
+ | else | ||
+ | args = frame.args | ||
+ | end | ||
+ | return Text.concatParams( args, | ||
+ | frame.args.separator, | ||
+ | frame.args.format ) | ||
+ | end | ||
function p.containsCJK( frame ) | function p.containsCJK( frame ) | ||
return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or "" | return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or "" | ||
+ | end | ||
+ | |||
+ | function p.getPlain( frame ) | ||
+ | return Text.getPlain( frame.args[ 1 ] or "" ) | ||
+ | end | ||
+ | |||
+ | function p.isLatinRange( frame ) | ||
+ | return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or "" | ||
+ | end | ||
+ | |||
+ | function p.isQuote( frame ) | ||
+ | return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or "" | ||
+ | end | ||
+ | |||
+ | |||
+ | |||
+ | function p.listToFormat(frame) | ||
+ | local lists = {} | ||
+ | local pformat = frame.args["format"] | ||
+ | local sep = frame.args["sep"] or ";" | ||
+ | |||
+ | -- Parameter parsen: Listen | ||
+ | for k, v in pairs(frame.args) do | ||
+ | local knum = tonumber(k) | ||
+ | if knum then lists[knum] = v end | ||
+ | end | ||
+ | |||
+ | -- Listen splitten | ||
+ | local maxListLen = 0 | ||
+ | for i = 1, #lists do | ||
+ | lists[i] = mw.text.split(lists[i], sep) | ||
+ | if #lists[i] > maxListLen then maxListLen = #lists[i] end | ||
+ | end | ||
+ | |||
+ | -- Ergebnisstring generieren | ||
+ | local result = "" | ||
+ | local result_line = "" | ||
+ | for i = 1, maxListLen do | ||
+ | result_line = pformat | ||
+ | for j = 1, #lists do | ||
+ | result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1) | ||
+ | end | ||
+ | result = result .. result_line | ||
+ | end | ||
+ | |||
+ | return result | ||
+ | end | ||
+ | |||
+ | |||
+ | |||
+ | function p.listToText( frame ) | ||
+ | local args | ||
+ | local template = frame.args.template | ||
+ | if type( template ) == "string" then | ||
+ | template = mw.text.trim( template ) | ||
+ | template = ( template == "1" ) | ||
+ | end | ||
+ | if template then | ||
+ | args = frame:getParent().args | ||
+ | else | ||
+ | args = frame.args | ||
+ | end | ||
+ | return Text.listToText( args, frame.args.format ) | ||
+ | end | ||
+ | |||
+ | |||
+ | |||
+ | function p.quote( frame ) | ||
+ | local slang = frame.args[2] | ||
+ | if type( slang ) == "string" then | ||
+ | slang = mw.text.trim( slang ) | ||
+ | if slang == "" then | ||
+ | slang = false | ||
+ | end | ||
+ | end | ||
+ | return Text.quote( frame.args[ 1 ] or "", | ||
+ | slang, | ||
+ | tonumber( frame.args[3] ) ) | ||
+ | end | ||
+ | |||
+ | |||
+ | |||
+ | function p.quoteUnquoted( frame ) | ||
+ | local slang = frame.args[2] | ||
+ | if type( slang ) == "string" then | ||
+ | slang = mw.text.trim( slang ) | ||
+ | if slang == "" then | ||
+ | slang = false | ||
+ | end | ||
+ | end | ||
+ | return Text.quoteUnquoted( frame.args[ 1 ] or "", | ||
+ | slang, | ||
+ | tonumber( frame.args[3] ) ) | ||
+ | end | ||
+ | |||
+ | |||
+ | |||
+ | function p.removeDiacritics( frame ) | ||
+ | return Text.removeDiacritics( frame.args[ 1 ] or "" ) | ||
end | end | ||
function p.sentenceTerminated( frame ) | function p.sentenceTerminated( frame ) | ||
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or "" | return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or "" | ||
+ | end | ||
+ | |||
+ | function p.ucfirstAll( frame ) | ||
+ | return Text.ucfirstAll( frame.args[ 1 ] or "" ) | ||
+ | end | ||
+ | |||
+ | function p.unstrip( frame ) | ||
+ | return mw.text.trim( mw.text.unstrip( frame.args[ 1 ] or "" ) ) | ||
end | end | ||
Zeile 148: | Zeile 739: | ||
return Text.uprightNonlatin( frame.args[ 1 ] or "" ) | return Text.uprightNonlatin( frame.args[ 1 ] or "" ) | ||
end | end | ||
+ | |||
+ | |||
+ | |||
+ | function p.zip(frame) | ||
+ | local lists = {} | ||
+ | local seps = {} | ||
+ | local defaultsep = frame.args["sep"] or "" | ||
+ | local innersep = frame.args["isep"] or "" | ||
+ | local outersep = frame.args["osep"] or "" | ||
+ | |||
+ | -- Parameter parsen | ||
+ | for k, v in pairs(frame.args) do | ||
+ | local knum = tonumber(k) | ||
+ | if knum then lists[knum] = v else | ||
+ | if string.sub(k, 1, 3) == "sep" then | ||
+ | local sepnum = tonumber(string.sub(k, 4)) | ||
+ | if sepnum then seps[sepnum] = v end | ||
+ | end | ||
+ | end | ||
+ | end | ||
+ | -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden | ||
+ | for i = 1, math.max(#seps, #lists) do | ||
+ | if not seps[i] then seps[i] = defaultsep end | ||
+ | end | ||
+ | |||
+ | -- Listen splitten | ||
+ | local maxListLen = 0 | ||
+ | for i = 1, #lists do | ||
+ | lists[i] = mw.text.split(lists[i], seps[i]) | ||
+ | if #lists[i] > maxListLen then maxListLen = #lists[i] end | ||
+ | end | ||
+ | |||
+ | local result = "" | ||
+ | for i = 1, maxListLen do | ||
+ | if i ~= 1 then result = result .. outersep end | ||
+ | for j = 1, #lists do | ||
+ | if j ~= 1 then result = result .. innersep end | ||
+ | result = result .. (lists[j][i] or "") | ||
+ | end | ||
+ | end | ||
+ | return result | ||
+ | end | ||
+ | |||
+ | |||
+ | |||
+ | function p.failsafe() | ||
+ | return Text.serial | ||
+ | end | ||
+ | |||
+ | |||
p.Text = function () | p.Text = function () |
Aktuelle Version vom 6. September 2019, 12:54 Uhr
Die Dokumentation für dieses Modul kann unter Modul:Text/Doku erstellt werden
local Text = { serial = "2018-11-14",
suite = "Text" }
--[=[
Text utilities
]=]
-- local globals
local PatternCJK = false
local PatternCombined = false
local PatternLatin = false
local PatternTerminated = false
local RangesLatin = false
local SeekQuote = false
local function factoryQuote()
-- Create quote definitions
if not Text.quoteLang then
local lucky, quoting = pcall( mw.loadData,
"Module:Text/quoting" )
if type( quoting ) == "table" then
Text.quoteLang = quoting.langs
Text.quoteType = quoting.types
end
if type( Text.quoteLang ) ~= "table" then
Text.quoteLang = { }
end
if type( Text.quoteType ) ~= "table" then
Text.quoteType = { }
end
if type( Text.quoteLang.en ) ~= "string" then
Text.quoteLang.en = "ld"
end
if type( Text.quoteType[ Text.quoteLang.en ] ) ~= "table" then
Text.quoteType[ Text.quoteLang.en ] = { { 8220, 8221 },
{ 8216, 8217 } }
end
end
end -- factoryQuote()
local function fiatQuote( apply, alien, advance )
-- Quote text
-- Parameter:
-- apply -- string, with text
-- alien -- string, with language code
-- advance -- number, with level 1 or 2
local r = apply
local suite
factoryQuote()
suite = Text.quoteLang[ alien ]
if not suite then
local slang = alien:match( "^(%l+)-" )
if slang then
suite = Text.quoteLang[ slang ]
end
if not suite then
suite = Text.quoteLang.en
end
end
if suite then
local quotes = Text.quoteType[ suite ]
if quotes then
local space
if quotes[ 3 ] then
space = " "
else
space = ""
end
quotes = quotes[ advance ]
if quotes then
r = mw.ustring.format( "%s%s%s%s%s",
mw.ustring.char( quotes[ 1 ] ),
space,
apply,
space,
mw.ustring.char( quotes[ 2 ] ) )
end
else
mw.log( "fiatQuote() " .. suite )
end
end
return r
end -- fiatQuote()
Text.char = function ( apply, again, accept )
-- Create string from codepoints
-- Parameter:
-- apply -- table (sequence) with numerical codepoints, or nil
-- again -- number of repetitions, or nil
-- accept -- true, if no error messages to be appended
-- Returns: string
local r
if type( apply ) == "table" then
local bad = { }
local codes = { }
local s
for k, v in pairs( apply ) do
s = type( v )
if s == "number" then
if v < 32 and v ~= 9 and v ~= 10 then
v = tostring( v )
else
v = math.floor( v )
s = false
end
elseif s ~= "string" then
v = tostring( v )
end
if s then
table.insert( bad, v )
else
table.insert( codes, v )
end
end -- for k, v
if #bad == 0 then
if #codes > 0 then
r = mw.ustring.char( unpack( codes ) )
if again then
if type( again ) == "number" then
local n = math.floor( again )
if n > 1 then
r = r:rep( n )
elseif n < 1 then
r = ""
end
else
s = "bad repetitions: " .. tostring( again )
end
end
end
else
s = "bad codepoints: " .. table.concat( bad, " " )
end
if s and not accept then
r = tostring( mw.html.create( "span" )
:addClass( "error" )
:wikitext( s ) )
end
end
return r or ""
end -- Text.char()
Text.concatParams = function ( args, apply, adapt )
-- Concat list items into one string
-- Parameter:
-- args -- table (sequence) with numKey=string
-- apply -- string (optional); separator (default: "|")
-- adapt -- string (optional); format including "%s"
-- Returns: string
local collect = { }
for k, v in pairs( args ) do
if type( k ) == "number" then
v = mw.text.trim( v )
if v ~= "" then
if adapt then
v = mw.ustring.format( adapt, v )
end
table.insert( collect, v )
end
end
end -- for k, v
return table.concat( collect, apply or "|" )
end -- Text.concatParams()
Text.containsCJK = function ( analyse )
-- Is any CJK code within?
-- Parameter:
-- analyse -- string
-- Returns: true, if CJK detected
local r
if not patternCJK then
patternCJK = mw.ustring.char( 91,
13312, 45, 40959,
131072, 45, 178207,
93 )
end
if mw.ustring.find( analyse, patternCJK ) then
r = true
else
r = false
end
return r
end -- Text.containsCJK()
Text.getPlain = function ( adjust )
-- Remove wikisyntax from string, except templates
-- Parameter:
-- adjust -- string
-- Returns: string
local i = adjust:find( "<!--", 1, true )
local r = adjust
local j
while i do
j = r:find( "-->", i + 3, true )
if j then
r = r:sub( 1, i ) .. r:sub( j + 3 )
else
r = r:sub( 1, i )
end
i = r:find( "<!--", i, true )
end -- "<!--"
r = r:gsub( "(</?%l[^>]*>)", "" )
:gsub( "'''(.+)'''", "%1" )
:gsub( "''(.+)''", "%1" )
:gsub( " ", " " )
return r
end -- Text.getPlain()
Text.isLatinRange = function ( adjust )
-- Are characters expected to be latin or symbols within latin texts?
-- Precondition:
-- adjust -- string, or nil for initialization
-- Returns: true, if valid for latin only
local r
if not RangesLatin then
RangesLatin = { { 7, 687 },
{ 7531, 7578 },
{ 7680, 7935 },
{ 8194, 8250 } }
end
if not PatternLatin then
local range
PatternLatin = "^["
for i = 1, #RangesLatin do
range = RangesLatin[ i ]
PatternLatin = PatternLatin ..
mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
end -- for i
PatternLatin = PatternLatin .. "]*$"
end
if adjust then
if mw.ustring.match( adjust, PatternLatin ) then
r = true
else
r = false
end
end
return r
end -- Text.isLatinRange()
Text.isQuote = function ( ask )
-- Is this character any quotation mark?
-- Parameter:
-- ask -- string, with single character
-- Returns: true, if ask is quotation mark
local r
if not SeekQuote then
SeekQuote = mw.ustring.char( 34, -- "
39, -- '
171, -- laquo
187, -- raquo
8216, -- lsquo
8217, -- rsquo
8218, -- sbquo
8220, -- ldquo
8221, -- rdquo
8222, -- bdquo
8249, -- lsaquo
8250, -- rsaquo
0x300C, -- CJK
0x300D, -- CJK
0x300E, -- CJK
0x300F ) -- CJK
end
if ask == "" then
r = false
elseif mw.ustring.find( SeekQuote, ask, 1, true ) then
r = true
else
r = false
end
return r
end -- Text.isQuote()
Text.listToText = function ( args, adapt )
-- Format list items similar to mw.text.listToText()
-- Parameter:
-- args -- table (sequence) with numKey=string
-- adapt -- string (optional); format including "%s"
-- Returns: string
local collect = { }
for k, v in pairs( args ) do
if type( k ) == "number" then
v = mw.text.trim( v )
if v ~= "" then
if adapt then
v = mw.ustring.format( adapt, v )
end
table.insert( collect, v )
end
end
end -- for k, v
return mw.text.listToText( collect )
end -- Text.listToText()
Text.quote = function ( apply, alien, advance )
-- Quote text
-- Parameter:
-- apply -- string, with text
-- alien -- string, with language code, or nil
-- advance -- number, with level 1 or 2, or nil
-- Returns: quoted string
local mode, slang
if type( alien ) == "string" then
slang = mw.text.trim( alien ):lower()
else
slang = mw.title.getCurrentTitle().pageLanguage
if not slang then
-- TODO FIXME: Introduction expected 2017-04
slang = mw.language.getContentLanguage():getCode()
end
end
if advance == 2 then
mode = 2
else
mode = 1
end
return fiatQuote( mw.text.trim( apply ), slang, mode )
end -- Text.quote()
Text.quoteUnquoted = function ( apply, alien, advance )
-- Quote text, if not yet quoted and not empty
-- Parameter:
-- apply -- string, with text
-- alien -- string, with language code, or nil
-- advance -- number, with level 1 or 2, or nil
-- Returns: string; possibly quoted
local r = mw.text.trim( apply )
local s = mw.ustring.sub( r, 1, 1 )
if s ~= "" and not Text.isQuote( s, advance ) then
s = mw.ustring.sub( r, -1, 1 )
if not Text.isQuote( s ) then
r = Text.quote( r, alien, advance )
end
end
return r
end -- Text.quoteUnquoted()
Text.removeDiacritics = function ( adjust )
-- Remove all diacritics
-- Parameter:
-- adjust -- string
-- Returns: string; all latin letters should be ASCII
-- or basic greek or cyrillic or symbols etc.
local cleanup, decomposed
if not PatternCombined then
PatternCombined = mw.ustring.char( 91,
0x0300, 45, 0x036F,
0x1AB0, 45, 0x1AFF,
0x1DC0, 45, 0x1DFF,
0xFE20, 45, 0xFE2F,
93 )
end
decomposed = mw.ustring.toNFD( adjust )
cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" )
return mw.ustring.toNFC( cleanup )
end -- Text.removeDiacritics()
Text.sentenceTerminated = function ( analyse )
-- Is string terminated by dot, question or exclamation mark?
-- Quotation, link termination and so on granted
-- Parameter:
-- analyse -- string
-- Returns: true, if sentence terminated
local r
if not PatternTerminated then
PatternTerminated = mw.ustring.char( 91,
12290,
65281,
65294,
65311 )
.. "!%.%?…][\"'%]‹›«»‘’“”]*$"
end
if mw.ustring.find( analyse, PatternTerminated ) then
r = true
else
r = false
end
return r
end -- Text.sentenceTerminated()
Text.ucfirstAll = function ( adjust )
-- Capitalize all words
-- Precondition:
-- adjust -- string
-- Returns: string with all first letters in upper case
local r = " " .. adjust
local i = 1
local c, j, m
if adjust:find( "&" ) then
r = r:gsub( "&", "&" )
:gsub( "<", "<" )
:gsub( ">", ">" )
:gsub( " ", " " )
:gsub( " ", " " )
:gsub( "‌", "‌" )
:gsub( "‍", "‍" )
:gsub( "‎", "‎" )
:gsub( "‏", "‏" )
m = true
end
while i do
i = mw.ustring.find( r, "%W%l", i )
if i then
j = i + 1
c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
r = string.format( "%s%s%s",
mw.ustring.sub( r, 1, i ),
c,
mw.ustring.sub( r, i + 2 ) )
i = j
end
end -- while i
r = r:sub( 2 )
if m then
r = r:gsub( "&", "&" )
:gsub( "<", "<" )
:gsub( ">", ">" )
:gsub( " ", " " )
:gsub( " ", " " )
:gsub( "‌", "‌" )
:gsub( "‍", "‍" )
:gsub( "‎", "‎" )
:gsub( "‏", "‏" )
:gsub( "&#X(%x+);", "&#x%1;" )
end
return r
end -- Text.ucfirstAll()
Text.uprightNonlatin = function ( adjust )
-- Ensure non-italics for non-latin text parts
-- One single greek letter might be granted
-- Precondition:
-- adjust -- string
-- Returns: string with non-latin parts enclosed in <span>
local r
Text.isLatinRange()
if mw.ustring.match( adjust, PatternLatin ) then
-- latin only, horizontal dashes, quotes
r = adjust
else
local c
local j = false
local k = 1
local m = false
local n = mw.ustring.len( adjust )
local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>"
local flat = function ( a )
-- isLatin
local range
for i = 1, #RangesLatin do
range = RangesLatin[ i ]
if a >= range[ 1 ] and a <= range[ 2 ] then
return true
end
end -- for i
end -- flat()
local focus = function ( a )
-- char is not ambivalent
local r = ( a > 64 )
if r then
r = ( a < 8192 or a > 8212 )
else
r = ( a == 38 or a == 60 ) -- '&' '<'
end
return r
end -- focus()
local form = function ( a )
return string.format( span,
r,
mw.ustring.sub( adjust, k, j - 1 ),
mw.ustring.sub( adjust, j, a ) )
end -- form()
r = ""
for i = 1, n do
c = mw.ustring.codepoint( adjust, i, i )
if focus( c ) then
if flat( c ) then
if j then
if m then
if i == m then
-- single greek letter.
j = false
end
m = false
end
if j then
local nx = i - 1
local s = ""
for ix = nx, 1, -1 do
c = mw.ustring.sub( adjust, ix, ix )
if c == " " or c == "(" then
nx = nx - 1
s = c .. s
else
break -- for ix
end
end -- for ix
r = form( nx ) .. s
j = false
k = i
end
end
elseif not j then
j = i
if c >= 880 and c <= 1023 then
-- single greek letter?
m = i + 1
else
m = false
end
end
elseif m then
m = m + 1
end
end -- for i
if j and ( not m or m < n ) then
r = form( n )
else
r = r .. mw.ustring.sub( adjust, k )
end
end
return r
end -- Text.uprightNonlatin()
Text.test = function ( about )
local r
if about == "quote" then
factoryQuote()
r = { QuoteLang = Text.quoteLang,
QuoteType = Text.quoteType }
end
return r
end -- Text.test()
-- Export
local p = { }
function p.char( frame )
local params = frame:getParent().args
local story = params[ 1 ]
local codes, lenient, multiple
if not story then
params = frame.args
story = params[ 1 ]
end
if story then
local items = mw.text.split( story, "%s+" )
if #items > 0 then
local j
lenient = ( params.errors == "0" )
codes = { }
multiple = tonumber( params[ "*" ] )
for k, v in pairs( items ) do
if v:sub( 1, 1 ) == "x" then
j = tonumber( "0" .. v )
elseif v == "" then
v = false
else
j = tonumber( v )
end
if v then
table.insert( codes, j or v )
end
end -- for k, v
end
end
return Text.char( codes, multiple, lenient )
end
function p.concatParams( frame )
local args
local template = frame.args.template
if type( template ) == "string" then
template = mw.text.trim( template )
template = ( template == "1" )
end
if template then
args = frame:getParent().args
else
args = frame.args
end
return Text.concatParams( args,
frame.args.separator,
frame.args.format )
end
function p.containsCJK( frame )
return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.getPlain( frame )
return Text.getPlain( frame.args[ 1 ] or "" )
end
function p.isLatinRange( frame )
return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.isQuote( frame )
return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.listToFormat(frame)
local lists = {}
local pformat = frame.args["format"]
local sep = frame.args["sep"] or ";"
-- Parameter parsen: Listen
for k, v in pairs(frame.args) do
local knum = tonumber(k)
if knum then lists[knum] = v end
end
-- Listen splitten
local maxListLen = 0
for i = 1, #lists do
lists[i] = mw.text.split(lists[i], sep)
if #lists[i] > maxListLen then maxListLen = #lists[i] end
end
-- Ergebnisstring generieren
local result = ""
local result_line = ""
for i = 1, maxListLen do
result_line = pformat
for j = 1, #lists do
result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1)
end
result = result .. result_line
end
return result
end
function p.listToText( frame )
local args
local template = frame.args.template
if type( template ) == "string" then
template = mw.text.trim( template )
template = ( template == "1" )
end
if template then
args = frame:getParent().args
else
args = frame.args
end
return Text.listToText( args, frame.args.format )
end
function p.quote( frame )
local slang = frame.args[2]
if type( slang ) == "string" then
slang = mw.text.trim( slang )
if slang == "" then
slang = false
end
end
return Text.quote( frame.args[ 1 ] or "",
slang,
tonumber( frame.args[3] ) )
end
function p.quoteUnquoted( frame )
local slang = frame.args[2]
if type( slang ) == "string" then
slang = mw.text.trim( slang )
if slang == "" then
slang = false
end
end
return Text.quoteUnquoted( frame.args[ 1 ] or "",
slang,
tonumber( frame.args[3] ) )
end
function p.removeDiacritics( frame )
return Text.removeDiacritics( frame.args[ 1 ] or "" )
end
function p.sentenceTerminated( frame )
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.ucfirstAll( frame )
return Text.ucfirstAll( frame.args[ 1 ] or "" )
end
function p.unstrip( frame )
return mw.text.trim( mw.text.unstrip( frame.args[ 1 ] or "" ) )
end
function p.uprightNonlatin( frame )
return Text.uprightNonlatin( frame.args[ 1 ] or "" )
end
function p.zip(frame)
local lists = {}
local seps = {}
local defaultsep = frame.args["sep"] or ""
local innersep = frame.args["isep"] or ""
local outersep = frame.args["osep"] or ""
-- Parameter parsen
for k, v in pairs(frame.args) do
local knum = tonumber(k)
if knum then lists[knum] = v else
if string.sub(k, 1, 3) == "sep" then
local sepnum = tonumber(string.sub(k, 4))
if sepnum then seps[sepnum] = v end
end
end
end
-- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
for i = 1, math.max(#seps, #lists) do
if not seps[i] then seps[i] = defaultsep end
end
-- Listen splitten
local maxListLen = 0
for i = 1, #lists do
lists[i] = mw.text.split(lists[i], seps[i])
if #lists[i] > maxListLen then maxListLen = #lists[i] end
end
local result = ""
for i = 1, maxListLen do
if i ~= 1 then result = result .. outersep end
for j = 1, #lists do
if j ~= 1 then result = result .. innersep end
result = result .. (lists[j][i] or "")
end
end
return result
end
function p.failsafe()
return Text.serial
end
p.Text = function ()
return Text
end -- p.Text
return p