local export = {}
local m_str_utils = require("Module:string utilities")
local m_utilities = require("Module:utilities")
local m_ja = require("Module:ja")
local show_labels = require("Module:labels").show_labels
--[=[
Other modules used: [[Module:parameters]]
]=]
local concat = table.concat
local convert_iteration_marks = require("Module:Hani").convert_iteration_marks
local find = string.find
local gsplit = m_str_utils.gsplit
local gsub = string.gsub
local kata_to_hira = m_ja.kata_to_hira
local insert = table.insert
local match = string.match
local remove = table.remove
local split = m_str_utils.split
local sub = string.sub
local ugsub = mw.ustring.gsub
local ulen = m_str_utils.len
local umatch = mw.ustring.match
local usub = m_str_utils.sub
local PAGENAME = mw.loadData("Module:headword/data").pagename
local NAMESPACE = mw.title.getCurrentTitle().nsText
local d_range = mw.loadData("Module:ja/data/range")
local yomi_data = mw.loadData("Module:kanjitab/data")
local kanji_grade_links = {
"[[Appendix:Japanese_glossary#kyōiku_kanji|Grade: 1]]",
"[[Appendix:Japanese_glossary#kyōiku_kanji|Grade: 2]]",
"[[Appendix:Japanese_glossary#kyōiku_kanji|Grade: 3]]",
"[[Appendix:Japanese_glossary#kyōiku_kanji|Grade: 4]]",
"[[Appendix:Japanese_glossary#kyōiku_kanji|Grade: 5]]",
"[[Appendix:Japanese_glossary#kyōiku_kanji|Grade: 6]]",
"[[Appendix:Japanese_glossary#jōyō_kanji|Grade: S]]", -- 7
"[[Appendix:Japanese_glossary#jinmeiyō_kanji|Jinmeiyō]]", -- 8
"[[Appendix:Japanese_glossary#hyōgai_kanji|Hyōgai]]" -- 9
}
-- this is the function that is called from templates
function export.show(frame)
local args = require("Module:parameters").process(frame:getParent().args, {
[1] = { list = true, allow_holes = true },
k = { list = true, allow_holes = true },
o = { list = true, allow_holes = true },
r = {},
sort = {},
yomi = {},
ateji = {},
alt = {},
alt2 = {},
kyu = { list = true },
y = {alias_of = "yomi"},
clearright = {type = "boolean"},
pagename = {},
})
local lang_code = frame.args[1]
local lang = require("Module:languages").getByCode(lang_code)
local lang_name = lang:getCanonicalName()
if args.pagename and NAMESPACE == "" then
require("Module:debug/track")("kanjitab/pagename param in mainspace")
end
local pagename = args.pagename or PAGENAME
local categories = {}
local cells = {}
-- extract kanji and non-kanji
local kanji = {}
local non_kanji = {}
-- 々 and 〻
pagename = convert_iteration_marks(pagename)
local kanji_border = 1
ugsub(pagename, "()([" .. d_range.kanji .. "々〻])()", function(p1, w1, p2)
insert(non_kanji, usub(pagename, kanji_border, p1 - 1))
kanji_border = p2
insert(kanji, w1)
end)
insert(non_kanji, usub(pagename, kanji_border))
-- kyujitai
local kyu = args.kyu
if kyu[1] == "-" then
kyu = {}
elseif kyu[1] == nil then
local form_kyu = {non_kanji[1]}
local kyu_data = mw.loadData("Module:ja/data/kyu")
local has_kyu, has_kyu_nonsupple, has_shin = false, false, false
for i, v in ipairs(kanji) do
local v_kyu = match(kyu_data[1], v .. "(%S*)%s")
if v_kyu == nil then
insert(form_kyu, v)
elseif v_kyu == "" then
has_shin = true
break
elseif v_kyu:sub(1, 1) == "&" then
has_kyu = true
insert(form_kyu, v_kyu)
else
has_kyu, has_kyu_nonsupple = true, true
insert(form_kyu, v_kyu)
end
insert(form_kyu, non_kanji[i + 1])
end
if not has_shin and has_kyu then
kyu[1] = (has_kyu_nonsupple and "" or pagename .. "|") .. concat(form_kyu)
end
if find(pagename, "弁") then
require("Module:debug/track")("kanjitab/ambiguous kyujitai for 弁")
kyu[1] = "which 弁?"
end
end
local all_yomi, missing_yomi
if args.yomi then
all_yomi = {}
local keys = split(args.yomi, ",")
for i, yomi, len in ipairs(keys) do
yomi, len = match(yomi, "^(%l*)(%d*)$")
yomi = yomi_data[yomi] or error("The yomi type \"" .. yomi .. "\" in the input \"" .. args.yomi .. "\" is not recognized.")
if len ~= "" then
-- Disallow length 0 or leading zeroes, as a sanity check.
len = match(len, "^[1-9]%d*$") and tonumber(len) or error("Cannot specify a length of " .. len .. " kanji.")
-- Only one yomi with no length given: apply to all kanji.
elseif i == 1 and #keys == 1 then
len = #kanji
else
len = 1
end
local yomi_type = yomi.type
-- If the on'yomi is not specified as goon/kanon/toon/soon, only "on".
if yomi_type == "on'yomi" then
require("Module:debug/track")("kanjitab/unspecified on")
elseif yomi_type == "jūbakoyomi" then
require("Module:debug/track")("kanjitab/jubakoyomi")
elseif yomi_type == "yutōyomi" then
require("Module:debug/track")("kanjitab/yutoyomi")
end
-- If the yomi requires a specific number of kanji (e.g. jūbakoyomi, yutōyomi).
local req_kanji = yomi.required_kanji
if req_kanji and #kanji ~= req_kanji then
error("The yomi type \"" .. yomi.type .. "\" is only applicable to terms with " .. req_kanji .. " kanji.")
elseif yomi.type == "none" then
missing_yomi = true
end
-- Insert yomi data for each applicable kanji. Wrap in a table first, as the range for this input yomi is determined by its identity, so that (e.g.) "kun,kun" is still treated as two separate inputs.
yomi = {data = yomi}
for _ = 1, len do
insert(all_yomi, yomi)
end
end
-- If there are any yomi slots left, handle them as empty.
if #all_yomi < #kanji then
missing_yomi = true
for _ = #all_yomi + 1, #kanji do
insert(all_yomi, {data = yomi_data.none})
end
end
elseif #kanji > 0 then
missing_yomi = true
end
if missing_yomi then
insert(categories, lang_name .. " terms with missing yomi")
end
-- process readings
local readings = {}
local readings_actual = {}
local reading_length_total = 0
for i = 1, args[1].maxindex do
local reading_kana, reading_length = match(args[1][i] or "", "^(%D*)(%d*)$")
reading_kana = reading_kana ~= "" and reading_kana or nil
reading_length = reading_kana and tonumber(reading_length) or 1
insert(readings, {reading_kana, reading_length})
reading_length_total = reading_length_total + reading_length
end
if reading_length_total > #kanji then
error("Readings for " .. reading_length_total .. " kanji are given, but this word has only " .. #kanji .. " kanji.")
else
for _ = reading_length_total + 1, #kanji do
insert(readings, {nil, 1})
end
end
local table_head = [=[
{| class="wikitable kanji-table floatright" style="text-align: center; ]=] .. (args.clearright and " clear:right;" or "") .. [=["
! ]=] .. (#kanji > 1 and "colspan=\"" .. #kanji .. "\" " or "") .. [=[style="font-weight: normal;" | [[Appendix:Japanese_glossary#kanji|Kanji]] in this term
|- lang="]=] .. lang_code .. [=[" class="Jpan" style="font-size: 2em; background: white; line-height: 1em;"
]=]
if args.k.maxindex and args.k.maxindex > args[1].maxindex then
error("kanjitab/too many k")
end
if args.o.maxindex and args.o.maxindex > args[1].maxindex then
error("kanjitab/too many o")
end
local is_ateji = {}
if args.ateji then
local ateji = args.ateji
local cat_ateji = false
if ateji == "y" then
for i = 1, #kanji do
is_ateji[i] = true
end
cat_ateji = true
else
for i in gsplit(ateji, ";") do
gsub(i, "^(%d+)$", function(a)
is_ateji[tonumber(a)] = true
cat_ateji = true
end)
gsub(i, "^(%d+),(%d+)$", function (a, b)
for j = tonumber(a), tonumber(b) do
is_ateji[j] = true
end
cat_ateji = true
end)
end
end
if cat_ateji then insert(categories, lang_name .. " terms spelled with ateji") end
end
-- if hiragana readings were passed,
-- make the "spelled with ..." categories, the readings cells on the lower level and build the sort key
-- otherwise rely on the pagename to make the original kanjitab and categories
local cells_above = {}
local cells_below = {}
local kanji_pos = 1
for i, reading in ipairs(readings) do
local reading_kana, reading_length = reading[1], reading[2]
local cell = {}
if reading_length <= 1 then
insert(cell, "| rowspan=\"2\" | ")
else
insert(cell, "| colspan =\"" .. reading_length .. "\" | ")
end
-- display reading, actual reading and okurigana
if reading_kana then
if reading_kana ~= "" and reading_kana ~= "-" and umatch(reading_kana, "[^" .. d_range.kana .. "]") then
error("Please remove any non-kana characters from the reading input " .. reading_kana .. ".")
end
local actual_reading = args.k[i]
local okurigana = args.o[i]
local okurigana_text = okurigana and "(" .. okurigana .. ")" or ""
local actual_reading_text = actual_reading and " > " .. actual_reading .. okurigana_text or ""
local text = reading_kana .. okurigana_text .. actual_reading_text
readings_actual[i] = {(actual_reading or reading_kana) .. (okurigana or ""), reading_length}
insert(cell, "<span class=\"Jpan\" lang=\"" .. lang_code .. "\">" .. text .. "</span>")
if reading_length <= 1 then insert(cell, "<br/>") end
else
readings_actual[i] = {nil, 1}
end
-- display kanji grade, categorize
for j = kanji_pos, kanji_pos + reading_length - 1 do
local single_kanji = kanji[j]
local kanji_grade = m_ja.kanji_grade(single_kanji)
local ateji_text = is_ateji[j] and "<br/><small>([[Appendix:Japanese glossary#ateji|ateji]])</small>" or ""
local type, compound
if all_yomi then
local yomi = all_yomi[j].data
type, compound = yomi.type, yomi.compound_reading
end
if not reading_kana then
if type ~= "irregular" then
require("Module:debug/track")("kanjitab/no reading")
end
insert(categories, lang_name .. " terms spelled with " .. single_kanji)
elseif reading_length ~= 1 or type == "irregular" then
insert(categories, lang_name .. " terms spelled with " .. single_kanji)
elseif compound then
-- Re-enable once all bad jukujikun calls are fixed.
-- error("The yomi type \"" .. type .. "\" is only applicable to compound character readings, so cannot apply to " .. single_kanji .. " read as " .. reading_kana .. ". If this is intended as part of a " .. type .. " reading, please enter the whole reading as one, followed by the number of kanji it applies to.")
require("Module:debug/track")("kanjitab/single kanji with jukujikun")
else -- Subcategorize by reading.
insert(categories, lang_name .. " terms spelled with " .. single_kanji .. " read as " .. kata_to_hira(reading_kana))
end
if reading_length <= 1 then
insert(cell, "<small>" .. kanji_grade_links[kanji_grade] .. "</small>" .. ateji_text)
else
insert(cells_below, "| <small>" .. kanji_grade_links[kanji_grade] .. "</small>" .. ateji_text)
end
end
insert(cells_above, concat(cell))
kanji_pos = kanji_pos + reading_length
end
insert(cells, "|- style=\"background: white;\"")
if #cells_below > 0 then
insert(cells, concat(cells_above, "\n"))
insert(cells, "|- style=\"background: white;\"")
insert(cells, concat(cells_below, "\n"))
else
for i, v in ipairs(cells_above) do
cells_above[i] = gsub(v, "| rowspan=\"2\" | ", "| ")
end
insert(cells, concat(cells_above, "\n"))
end
local rendaku = args.r
if rendaku then
insert(categories, lang_name .. " terms with rendaku")
end
if all_yomi then
insert(cells, "|-")
local len, all_on, yomi_cat = 1, true
for i, yomi in ipairs(all_yomi) do
-- If the next kanji has the same yomi table, it's part of the same range.
if yomi == all_yomi[i + 1] then
len = len + 1
else
yomi = yomi.data
local yomi_type = yomi.type
local display = yomi.display or yomi_type
local appendix = yomi.appendix
insert(cells, "| colspan=\"" .. len .. "\" |" .. (
appendix == false and display or
"[[Appendix:Japanese_glossary#" .. (appendix or yomi_type) .. "|" .. display .. "]]"
))
-- Categorise as irregular if any irregular yomi are found; otherwise, categorise if all yomi are of the same type. If yomi are of different types but are all on, on'yomi is used as a fallback.
if yomi_cat ~= "irregular" then
local cat_type = yomi_type
if cat_type == "irregular" or yomi_cat == nil then
yomi_cat = cat_type
elseif yomi_cat ~= cat_type then
yomi_cat = false
end
if not yomi.onyomi then
all_on = false
end
end
len = 1
end
end
if yomi_cat then
-- Check yomi_data first, in case cat_type is "irregular"; if no match, must be some other type, so get it from the first yomi in all_yomi, since not all yomi types are yomi_data keys.
yomi_cat = yomi_data[yomi_cat] or all_yomi[1].data
elseif all_on then
yomi_cat = yomi_data.on
elseif #all_yomi == 2 then
local y1, y2 = all_yomi[1].data, all_yomi[2].data
if ulen(pagename) == 2 then
if y1.onyomi and y2.type == "kun'yomi" then
yomi_cat = yomi_data.j -- jūbakoyomi
elseif y1.type == "kun'yomi" and y2.onyomi then
yomi_cat = yomi_data.y -- yutōyomi
end
end
end
if yomi_cat then
local category = yomi_cat.reading_category
if category ~= false then
insert(categories, lang_name .. " " .. "terms read with " .. (category or yomi_cat.type))
end
end
end
local kanji_table
if #kanji > 0 then
kanji_table = table_head
for _, v in ipairs(kanji) do
kanji_table = kanji_table .. "| style=\"padding: 0.5em;\" | [[" .. v .. "#" .. lang_name .. "|" .. v .. "]]\n"
end
kanji_table = kanji_table .. concat(cells, "\n") .. "\n|}"
else
kanji_table = ""
end
local forms_table = ""
if args.alt == "" or args.alt == "-" then args.alt = nil end
if kyu[1] or args.alt then
local forms = {}
-- |kyu=
if kyu[1] == "which 弁?" then
insert(forms, "<strong class=\"error\" style=\"font-size:75%;\">Please specify the correct kyujitai for 弁 with the parameter \"kyu\".</strong>[[Category:Requests for cleanup in " .. lang_name .. " entries]]")
remove(kyu, 1)
end
for _, form in ipairs(kyu) do
local form_linkto, form_display = match(form, "^(.+)|(.+)$")
if not form_linkto then form_linkto, form_display = form, form end
insert(forms, concat{
"<span class=\"Jpan\" lang=\"" .. lang_code .. "\" style=\"font-family:游ゴシック, HanaMinA, sans-serif; font-size:140%;\">[[",
form_linkto,
form_linkto == pagename and "|" or "#" .. lang_name .. "|",
form_display,
"]]</span> <small>",
show_labels {labels = {"kyūjitai"}, lang = lang, nocat = true },
"</small>",
})
end
-- |alt=
if args.alt then
for form in gsplit(args.alt, ",") do
local i_semicolon = find(form, ":")
if i_semicolon then
local altform = sub(form, 1, i_semicolon - 1)
local altlabels = split(sub(form, i_semicolon + 1), " ")
insert(forms, concat{
"<span class=\"Jpan\" lang=\"" .. lang_code .. "\" style=\"font-size:140%\">[[",
altform,
"#" .. lang_name .. "|",
altform,
"]]</span> <small>",
show_labels { labels = altlabels, lang = lang, nocat = true },
"</small>",
})
else
insert(forms, concat{
"<span class=\"Jpan\" lang=\"" .. lang_code .. "\" style=\"font-size:140%\">[[",
form,
"#" .. lang_name .. "|",
form,
"]]</span>"
})
end
end
end
forms_table = "\n" .. [[{| class="wikitable floatright"
! style="font-weight:normal" | Alternative spelling]] .. (#forms == 1 and "" or "s") .. [[
|-
| style="text-align:center;font-size:108%" | ]] .. concat(forms, "<br>") .. "\n|}"
end
local forms_table2 = ""
if args.alt2 and args.alt2 ~= "" and args.alt2 ~= "-" then
local forms2 = {}
for form in gsplit(args.alt2, ",") do
insert(forms2, "<span class=\"Jpan\" lang=\"" .. lang_code .. "\">[[" .. form .. "#" .. lang_name .. "|" .. form .. "]]</span>")
end
forms_table2 = "\n" .. [[{| class="wikitable floatright"
! style="font-weight:normal" | Variant form]] .. (#forms2 == 1 and "" or "s") .. "\n" .. [[
| style="text-align:center;font-size:140%" | ]] .. concat(forms2, "<br>") .. "\n|}"
end
-- use user-provided sortkey if we got one, otherwise
-- use the sortkey we've already made by combining the
-- readings if provided, if we have neither then
-- default to empty string and don't sort
local sortkey
if args.sort then
sortkey = args.sort
else
sortkey = {non_kanji[1]}
local id = 1
for _, v in ipairs(readings_actual) do
id = id + v[2]
insert(sortkey, (v[1] or "") .. (non_kanji[id] or ""))
end
sortkey = concat(sortkey)
end
if sortkey == "" then
sortkey = nil
else
sortkey = lang:makeSortKey(sortkey)
end
if sortkey ~= lang:makeSortKey(PAGENAME) then
require("Module:debug/track"){"kanjitab/nonstandard sortkey", "kanjitab/nonstandard sortkey/" .. lang_code}
end
return kanji_table .. forms_table .. forms_table2 .. m_utilities.format_categories(categories, lang, sortkey)
end
return export