Module:Peter Bowman/ortografieBE

There are no reviewed versions of this page, so it may not have been checked for adherence to standards.

Documentation for this module may be created at Module:Peter Bowman/ortografieBE/doc

local p = {}

local insert = table.insert
local lower  = mw.ustring.lower

local errCategory = '[[:Kategoria:test]]'

local errStrings = {
	unrecognizedChar = 'Nierozpoznany znak $1',
	conversionError  = 'Błąd w konwersji znaku $1'
}

-- [[w:pl:Łacinka białoruska]]
local equiv = {
	vowels = {
		-- { na początku wyrazu/po samogłosce, po spółgłosce/po "ь", po apostrofie, po "л" }
		['а'] = { 'a',  'a',  nil,  'a' }, ['А'] = { 'A',  'A',  nil,  'a' },
		['э'] = { 'e',  'e',  nil,  'e' }, ['Э'] = { 'E',  'E',  nil,  'e' },
		['і'] = { 'i',  'i',  'ji', 'i' }, ['І'] = { 'I',  'I',  'Ji', 'i' },
		['о'] = { 'o',  'o',  nil,  'o' }, ['О'] = { 'O',  'O',  nil,  'o' },
		['у'] = { 'u',  'u',  nil,  'u' }, ['У'] = { 'U',  'U',  nil,  'u' },
		['ы'] = { nil,  'y',  nil,  'y' }, ['Ы'] = { nil,  'Y',  nil,  'y' },
		['я'] = { 'ja', 'ia', 'ja', 'a' }, ['Я'] = { 'Ja', 'Ia', 'Ja', 'a' },
		['е'] = { 'je', 'ie', 'je', 'e' }, ['Е'] = { 'Je', 'Ie', 'Je', 'e' },
		['ё'] = { 'jo', 'io', 'jo', 'o' }, ['Ё'] = { 'Jo', 'Io', 'Jo', 'o' },
		['ю'] = { 'ju', 'iu', 'ju', 'u' }, ['Ю'] = { 'Ju', 'Iu', 'Ju', 'u' }
	},
	consonants = {
		-- { normalnie, przed "ь"/przed "я", "е", "ё", "ю", "ь" }
		['б'] = { 'b',  'b' },  ['Б'] = { 'B',  'B' },
		['в'] = { 'v',  'v' },  ['В'] = { 'V',  'V' },
		['г'] = { 'h',  'h' },  ['Г'] = { 'H',  'H' },
		['д'] = { 'd',  nil },  ['Д'] = { 'D',  nil },
		['ж'] = { 'ž',  nil },  ['Ж'] = { 'Ž',  nil },
		['з'] = { 'z',  'ź' },  ['З'] = { 'Z',  'Ź' },
		['й'] = { 'j',  'j' },  ['Й'] = { 'J',  'J' },
		['к'] = { 'k',  'k' },  ['К'] = { 'K',  'K' },
		['л'] = { 'ł',  'l' },  ['Л'] = { 'Ł',  'L' },
		['м'] = { 'm',  'm' },  ['М'] = { 'M',  'M' },
		['н'] = { 'n',  'ń' },  ['Н'] = { 'N',  'Ń' },
		['п'] = { 'p',  'p' },  ['П'] = { 'P',  'P' },
		['р'] = { 'r',  nil },  ['Р'] = { 'r',  nil },
		['с'] = { 's',  'ś' },  ['С'] = { 's',  'Ś' },
		['т'] = { 't',  nil },  ['Т'] = { 'T',  nil },
		['ў'] = { 'ŭ',  'ŭ' },  ['Ў'] = { 'Ŭ',  'Ŭ' },
		['ф'] = { 'f',  'f' },  ['Ф'] = { 'F',  'F' },
		['х'] = { 'ch', 'ch' }, ['Х'] = { 'Ch', 'Ch' },
		['ц'] = { 'c',  'ć' },  ['Ц'] = { 'C',  'Ć' },
		['ч'] = { 'č',  nil },  ['Ч'] = { 'Č',  nil },
		['ш'] = { 'š',  nil },  ['Ш'] = { 'Š',  nil },
		['ь'] = { '',   nil },
		["'"] = { '',   nil }
	},
	common = {
		[' '] = true,
		['.'] = true,
		[','] = true,
		['–'] = true,
		['—'] = true
	}
}

local Romanizator = {
	output = {},
	chars  = {},
	currentPos = nil
}

function Romanizator:addChar( char )
	local lc = lower( char )
	
	if equiv.vowels[ char ] then
		insert( self.chars, {
			value   = char,
			type    = 'vowel',
			special = ( lc == 'я' or lc == 'е' or lc == 'ё' or lc == 'ю' or lc == 'і' ),
			conversion = equiv.vowels[ char ]
		} )
	elseif equiv.consonants[ char ] then
		insert( self.chars, {
			value = char,
			type  = 'consonant',
			special = ( lc == 'з' or lc == 'н' or lc == 'с' or lc == 'ц' ),
			conversion = equiv.consonants[ char ]
		} )
	elseif equiv.common[ char ] then
		insert( self.chars, {
			value = char,
			type  = 'common'
		} )
	else
		return false
	end
	
	return true
end

function Romanizator:saveLatinChar( char, conversion )
	if not conversion then
		self.error = self.error or char
	else
		insert( self.output, conversion )
	end
end

function Romanizator:checkVowel( char )
	local prevPos = self.currentPos - 1
	
	if
		self.currentPos == 1 or
		self.chars[ prevPos ].type == 'vowel'
	then
		self:saveLatinChar( char, char.conversion[ 1 ] )
	elseif
		lower( self.chars[ prevPos ].value ) == 'л'
	then
		self:saveLatinChar( char, char.conversion[ 4 ] )
	elseif
		self.chars[ prevPos ].value == 'ь' or
		self.chars[ prevPos ].value == '\''
	then
		self:saveLatinChar( char, char.conversion[ 3 ] )
	else
		self:saveLatinChar( char, char.conversion[ 2 ] )
	end
end

function Romanizator:checkConsonant( char )
	local nextChar = self.chars[ self.currentPos + 1 ]
	
	if
		nextChar.value == 'ь' or
		( not char.special and nextChar.type == 'vowel' and nextChar.special )
	then
		self:saveLatinChar( char, char.conversion[ 2 ] )
	else
		self:saveLatinChar( char, char.conversion[ 1 ] )
	end
end

function Romanizator:romanize()
	for i, char in ipairs( self.chars ) do
		self.currentPos = i
		
		if char.type == 'vowel' then
			self:checkVowel( char )
		elseif char.type == 'consonant' then
			self:checkConsonant( char )
		elseif char.type == 'common' then
			insert( self.output, char.value )
		end
	end
	
	if self.error then
		return (
			string.gsub( errStrings.conversionError, '$1', self.error )
		) .. errCategory
	else
		return table.concat( self.output, '' )
	end
end

function Romanizator:create( o )
	o = o or {}
	setmetatable( o, self )
	self.__index = self
	return o
end

p.main = function( frame )
	local args = frame.args
	local pargs = frame:getParent().args
	
	local lacinka = Romanizator:create()
	
	for codepoint in mw.ustring.gcodepoint( args[ 1 ] ) do
		if not lacinka:addChar( mw.ustring.char( codepoint ) ) then
			return ( string.gsub(
				errStrings.unrecognizedChar, '$1', mw.ustring.char( codepoint )
			) ) .. errCategory
		end
	end
	
	return lacinka:romanize()
end

return p