Module:Sandbox/Santi2222/eu-pronunc-ok

विक्षनरी से

"इस मॉड्यूल हेतु प्रलेख Module:Sandbox/Santi2222/eu-pronunc-ok/doc पर बनाया जा सकता है"

-- Based on [[Module:es-pronunc]] by: Benwing
-- Adapted by Santi2222

local export = {}

local m_IPA = require("Module:IPA")

local lang = require("Module:languages").getByCode("eu")

local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub
local ulen = mw.ustring.len

local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- grave =  ̀
local TILDE = u(0x0303) -- tilde =  ̃

local vowel = "aeiou" -- vowel; include y so we get single-word y correct
local V = "[" .. vowel .. "]"
local W = "[jw]" -- glide
local stress = AC .. GR
local separator = "# ."
local separator_c = "[" .. separator .. "]"
local C = "[^" .. vowel .. separator .. "]" -- consonant
local T = "[^" .. vowel .. "lrɾjw" .. separator .. "]" -- obstruent or nasal

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
	local retval, nsubs = rsubn(term, foo, bar)
	return retval, nsubs > 0
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- style == one of the following:
-- "northern_style": lack of palatalization, /h/, <j> = /j/
-- "southern_style": palatalization, lack of /h/, <j> = /x/

function export.IPA(text, style, phonetic)

	local northern = style == "northern_style"
	local n_diff = false

	text = ulower(text or mw.title.getCurrentTitle().text)
	-- decompose everything but ñ
	text = mw.ustring.toNFD(text)
	text = rsub(text, ".[" .. TILDE .. "]", {
		["n" .. TILDE] = "ñ",
	})
	-- convert commas and en/en dashes to IPA foot boundaries
	text = rsub(text, "%s*[,–—]%s*", " | ")
	-- question mark or exclamation point in the middle of a sentence -> IPA foot boundary
	text = rsub(text, "([^%s])%s*[!?]%s*([^%s])", "%1 | %2")

	-- canonicalize multiple spaces and remove leading and trailing spaces
	local function canon_spaces(text)
		text = rsub(text, "%s+", " ")
		text = rsub(text, "^ ", "")
		text = rsub(text, " $", "")
		return text
	end

	text = canon_spaces(text)

	-- Convert hyphens to spaces
	text = rsub(text, "%-", " ")
	-- canonicalize multiple spaces again, which may have been introduced by hyphens
	text = canon_spaces(text)
	-- now eliminate punctuation
	text = rsub(text, "[!?']", "")
	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"

	--cases in which <j> is always /x/ (requires respelling)
	text = rsubb(text, "kh", "X")
	
	--determining whether "h" denotes a phoneme, or the lack of palatalization, palatalization when needed
	text = rsub(text, "nh", "N")
	text = rsub(text, "lh", "L")

	text = rsub(text, "([aeo])il([# .h]*[aeiou])", (northern and "%1ił%2" or "%1ĺ%2"))
	text = rsub(text, "([aeo])in([# .h]*[aeiou])", (northern and "%1iň%2" or "%1ń%2"))

	text = rsub(text, "uin([# .h]*[aeiou])", (northern and "uiň%1" or "uiń%1"))
	text = rsub(text, "uil([# .h]*[aeiou])", (northern and "uił%1" or "uiĺ%1"))
	
	text = rsub(text, "il([# .h]*[aeiou])", (northern and "ił%1" or "iĺ%1"))
	text = rsub(text, "in([# .h]*[aeiou])", (northern and "iň%1" or "iń%1"))
	
	if text:find("[łňńĺh]") then
		n_diff = true
	end

	text = rsub(text, "ĺ", "ʎ")
	text = rsub(text, "ń", "ɲ")
	text = rsub(text, "N", "n")
	text = rsub(text, "L", "l")
	text = rsub(text, "ň", "n")
	text = rsub(text, "ł", "l")

	--c, g, q
	text = rsub(text, "ng([^aeiouüwhlr])", "n%1") -- [[Bangkok]], [[angstrom]], etc.
	text = rsub(text, "q", "k") -- [[quark]], [[Qatar]], [[burqa]], [[Iraq]], etc.
	text = rsub(text, "c", "k") -- [[campus]], etc.
	
	--alphabet-to-phoneme
	text = rsub(text, "tx", "C") --not the real sound
	text = rsub(text, "tz", "Ś") --not the real sound
	text = rsub(text, "ts", "S") --not the real sound
	text = rsubb(text, "ll", "ʎ")
	text = rsubb(text, "dd", "ɟ")
	text = rsubb(text, "tt", "c")
	text = rsub(text, "x", "ʃ")
	text = rsub(text, "#p([st])", "#%1") -- [[psicologia]], [[pterodaktilo]]
	text = rsub(text, "[gñrvz]",
		{["g"]="ɡ",["ñ"]="ɲ", ["r"]="ɾ", ["v"]="b", ["z"]="ś"})
	
	--/x/ sound
	text = rsubb(text, "j", (northern and "J" or "x"))
	if text:find("[Jx]") then
		n_diff = true
	end
	text = rsub(text, "J", "j")
	
	--cases in which <j> is always /j/ (requires respelling)
	text = rsubb(text, "y", "j")

	-- trilled r (in all cases except between vowels)
	text = rsub(text, "ɾɾ", "r")
	text = rsub(text, "([aeiou])ɾ([aeiou])", "%1ŕ%2")
	text = rsub(text, "ɾ", "r")
	text = rsub(text, "ŕ", "ɾ")

	text = rsub(text, "n([# .]*[bpm])", "m%1")

	--syllable division
	local vowel_to_glide = { ["i"] = "i.", ["u"] = "u." }
	
	-- i and u between vowels -> .i and .u
	text = rsub_repeatedly(text, "(" .. V .. "*)([iu])(" .. V .. ")",
		function (v1, iu, v2) return v1 .. vowel_to_glide[iu] .. v2 end
	)
	
	text = rsub_repeatedly(text, "(" .. V .. ")(" .. C .. W .. "?" .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. C .. ")(" .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. C .. ")(" .. C .. C .. V .. ")", "%1.%2")
	text = rsub(text, "([pbktdɡ])%.([lɾ])", ".%1%2")
	text = rsub_repeatedly(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
	-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	text = rsub_repeatedly(text, "([aeo]" .. ")([aeo])", "%1.%2")
	text = rsub(text, "([iu]" .. ")([aeo])", "%1.%2")
	text = rsub_repeatedly(text, "ii", "i.i")
	text = rsub_repeatedly(text, "uu", "u.u")

	--diphthongs
	text = rsub(text, "ih?([aeou])", "j%1")
	text = rsub(text, "uh?([aeio])", "w%1")

	local words = rsplit(text, " ")
	for j, word in ipairs(words) do
		
		local syllables = rsplit(word, "%.")
		
		-- Vowels are nasalized if followed by nasal in same syllable.
		if phonetic then
			for i = 1, #syllables do
				-- first check for two vowels (veinte)
				syllables[i] = rsub(syllables[i], "(" .. V .. ")(" .. V .. ")([mnɲ])",
					"%1" .. TILDE .. "%2" .. TILDE .. "%3")
				-- then for one vowel
				syllables[i] = rsub(syllables[i], "(" .. V .. ")([mnɲ])", "%1" .. TILDE .. "%2")
			end
		end

		-- Reconstruct the word.
		words[j] = table.concat(syllables, phonetic and "." or "")
	end

	text = table.concat(words, " ")

	--phonetic transcription
	if phonetic then
		-- s, z, f before voiced consonants
		local voiced = "mnɲbdɟɡʎl"
		local r = "ɾr"
		local tovoiced = {
			["s"] = "z̺",
			["ś"] = "z̻",
			["f"] = "v",
		}
		local function voice(sound, following)
			return tovoiced[sound] .. following
		end
		text = rsub(text, "([sś])(" .. separator_c .. "*[" .. voiced .. r .. "])", voice)
		text = rsub(text, "(f)(" .. separator_c .. "*[" .. voiced .. "])", voice)

		-- fricative vs. stop allophones; first convert stops to fricatives, then back to stops
		-- after nasals and sometimes after l
		local stop_to_fricative = {["b"] = "β", ["d"] = "ð", ["ɡ"] = "ɣ"}
		local fricative_to_stop = {["β"] = "b", ["ð"] = "d", ["ɣ"] = "ɡ"}
		text = rsub(text, "[bdɡ]", stop_to_fricative)
		text = rsub(text, "([mnɲ]" .. separator_c .. "*)([βɣ])",
			function(nasal, fricative) return nasal .. fricative_to_stop[fricative] end
		)
		text = rsub(text, "##β","##b")
		text = rsub(text, "##ð","##d")
		text = rsub(text, "##ɣ","##g")

		text = rsub(text, "([lʎmnɲ]" .. separator_c .. "*)([ð])",
			function(nasal_l, fricative) return nasal_l .. fricative_to_stop[fricative] end
		)
		text = rsub(text, "[td]", {["t"] = "t̪", ["d"] = "d̪"})

		-- nasal assimilation before consonants
		local labiodental, dentialveolar, alveolopalatal, palatal, velar =
			"ɱ", "n̪", "nʲ", "ɲ", "ŋ"
		local nasal_assimilation = {
			["f"] = labiodental,
			["t"] = dentialveolar, ["d"] = dentialveolar,
			["C"] = alveolopalatal,
			["ʃ"] = alveolopalatal,
			["ɟ"] = palatal, ["c"] = palatal, ["ʎ"] = palatal, ["j"] = palatal,
			["k"] = velar, ["x"] = velar, ["ɡ"] = velar,
		}
		text = rsub(text, "n(" .. separator_c .. "*)(.)",
			function(stress, following) return (nasal_assimilation[following] or "n") .. stress .. following end
		)

		-- lateral assimilation before consonants
		text = rsub(text, "l(" .. separator_c .. "*)(.)",
			function(stress, following)
				local l = "l"
				if following == "t" or following == "d" then -- dentialveolar
					l = "l̪"
				elseif following == "C" or following == "ʃ" then -- alveolopalatal
					l = "lʲ"
				elseif following == "ɟ" or following == "c" or following == "j" then -- alveolopalatal
					l = "ʎ"
				end
				return l .. stress .. following
			end)

		-- voiced fricatives are actually approximants
		text = rsub(text, "([βðɣ])", "%1̞")
		
		-- northern /h/ is usually [ɦ]
		text = rsub(text, "h", "ɦ")

	end

	-- lack of /h/ in Southern dialects
	text = rsub(text, "h", (northern and "h" or ""))
	text = rsub(text, "ɦ", (northern and "ɦ" or ""))
	
	--semivowels
	text = rsub(text, "([aeouãẽõũ][iĩ])", "%1̯")
	text = rsub(text, "([aeioãẽĩõ][uũ])", "%1̯")
		
	-- convert fake symbols to real ones
	local final_conversions =  {
		["S"] = "t͡s̺",
		["Ś"] = "t͡s̻",
		["s"] = "s̺",
		["ś"] = "s̻",
		["C"] = "t͡ʃ",
		["X"] = "x"
	}
	text = rsub(text, "[SŚsśCX]", final_conversions)

	-- remove # symbols at word and text boundaries
	text = rsub(text, "#", "")
	text = mw.ustring.toNFC(text)

	local ret = {
		text = text,
		northern_different = n_diff,
	}

	return ret
end

-- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function export.IPA_string(frame)
	local iparams = {
		[1] = {},
		["style"] = {required = true},
		["phonetic"] = {type = "boolean"},
	}
	local iargs = require("Module:parameters").process(frame.args, iparams)
	local retval = export.IPA(iargs[1], iargs.style, iargs.phonetic)
	return retval.text
end


function export.show(frame)
	local params = {
		[1] = {},
		["pre"] = {},
		["post"] = {},
		["ref"] = {},
		["style"] = {},
		["bullets"] = {type = "number", default = 1},
	}
	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)
	local phonemic = {}
	local phonetic = {}
	local expressed_styles = {}
	local text = args[1] or mw.title.getCurrentTitle().text
	local function dostyle(style)
		phonemic[style] = export.IPA(text, style, false)
		phonetic[style] = export.IPA(text, style, true)
	end
	local function express_style(hidden_tag, tag, style)
		if not phonemic[style] then
			dostyle(style)
		end
		local new_style = {
			tag = tag,
			phonemic = phonemic[style],
			phonetic = phonetic[style],
		}
		for _, hidden_tag_style in ipairs(expressed_styles) do
			if hidden_tag_style.tag == hidden_tag then
				table.insert(hidden_tag_style.styles, new_style)
				return
			end
		end
		table.insert(expressed_styles, {
			tag = hidden_tag,
			styles = {new_style},
		})
	end
	dostyle("northern_style")
	local northern_different = phonemic["northern_style"].northern_different
	if not northern_different then
		express_style(false, false, "southern_style")
	else
		express_style("Northern", "Northern", "northern_style")
		express_style("Southern", "Southern", "southern_style")
	end

	-- If only one style group, don't indicate the style.
	if #expressed_styles == 1 then
		expressed_styles[1].tag = false
		if #expressed_styles[1].styles == 1 then
			expressed_styles[1].styles[1].tag = false
		end
	end

	local lines = {}

	local function format_style(tag, expressed_style, is_first)
		local pronunciations = {}
		table.insert(pronunciations, {
			pron = "/" .. expressed_style.phonemic.text .. "/",
			qualifiers = tag and {tag} or nil,
		})
		table.insert(pronunciations, {
			pron = "[" .. expressed_style.phonetic.text .. "]",
		})
		local bullet = string.rep("*", args.bullets) .. " "
		local pre = is_first and args.pre and args.pre .. " " or ""
		local post = is_first and (args.ref or "") .. (args.post and " " .. args.post or "") or ""
		local formatted = bullet .. pre .. m_IPA.format_IPA_full(lang, pronunciations) .. post
		local formatted_for_len = bullet .. pre .. "IPA(key): " .. (tag and "(" .. tag .. ") " or "") ..
			"/" .. expressed_style.phonemic.text .. "/, [" .. expressed_style.phonetic.text .. "]" .. post
		return formatted, formatted_for_len
	end

	for i, style_group in ipairs(expressed_styles) do
		if #style_group.styles == 1 then
			style_group.formatted, style_group.formatted_for_len =
				format_style(style_group.styles[1].tag, style_group.styles[1], i == 1)
		else
			style_group.formatted, style_group.formatted_for_len =
				format_style(style_group.tag, style_group.styles[1], i == 1)
			for j, style in ipairs(style_group.styles) do
				style.formatted, style.formatted_for_len =
					format_style(style.tag, style, i == 1 and j == 1)
			end
		end
	end

	local maxlen = 0
	for i, style_group in ipairs(expressed_styles) do
		local this_len = ulen(style_group.formatted_for_len)
		if #style_group.styles > 1 then
			for _, style in ipairs(style_group.styles) do
				this_len = math.max(this_len, ulen(style.formatted_for_len))
			end
		end
		maxlen = math.max(maxlen, this_len)
	end

	for i, style_group in ipairs(expressed_styles) do
		if #style_group.styles == 1 then
			table.insert(lines, style_group.formatted)
		else
			local inline = '\n<div class="vsShow" style="display:none">\n' .. style_group.formatted .. "</div>"
			local full_prons = {}
			for _, style in ipairs(style_group.styles) do
				table.insert(full_prons, style.formatted)
			end
			local full = '\n<div class="vsHide">\n' .. table.concat(full_prons, "\n") .. "</div>"
			local em_length = math.floor(maxlen * 0.68) -- from [[Module:grc-pronunciation]]
			table.insert(lines, '<div class="vsSwitcher" data-toggle-category="pronunciations" style="width: ' .. em_length .. 'em; max-width:100%;"><span class="vsToggleElement" style="float: right;">&nbsp;</span>' .. inline .. full .. "</div>")
		end
	end

	return table.concat(lines, "\n")
end

return export