User:Opencooper/showKanji.js

// This script shows, if found, the kanji and kana for an article// It then calls another script, bindKana.js, to clean up the display of ruby// For configuration, please see the documentation// License: CC0function setup() {    // If we're not reading an article, do nothing    if (!(mw.config.get( 'wgAction' ) === 'view'          && mw.config.get( 'wgIsArticle' )          && !location.search.split('oldid=')[1]          && !mw.config.get("wgIsMainPage")          && mw.config.get("wgContentLanguage") !== "ja")) {        return;    }    // Assuming that if there's no wikidata, there're no 1:1 interlanguage links,    // and we don't want cases where a page links to a subsection of a jawiki    // article    if (wikidataId === null) {        return;    }    // Placeholder so other elements don't push it down later    var header;    if ($('#firstHeading').length) { // Vector    header = $('#firstHeading');    } else if ($('.page-heading').length) { // Minerva    header =  $('.page-heading');    } else {    console.error("showKanji.js: Couldn't find a page heading. This skin ("                  + mw.config.get( 'skin' ) + ") might not be supported.");    return;    }    header.append("<div id='kanjiInfo' lang='ja' dir='ltr'></div>");    // Get the Japanese label from wikidata    // API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities    $.ajax({        url: "https://www.wikidata.org/w/api.php",        data: {            action: "wbgetentities",            ids: wikidataId,            props: "labels",            languages: "ja",            format: "json",            origin: "*"        },        success: parseJaLabel    });}function parseJaLabel(response) {    var wikidataInfo = response.entities[wikidataId];    var jaLabel;    if (!jQuery.isEmptyObject(wikidataInfo.labels.ja)) {        jaLabel = wikidataInfo.labels.ja.value;    }    if (jaLabel) {    jaLabel = jaLabel.toHalfWidth();        buildRegexes(jaLabel);        displayKanji(jaLabel);    } else {        return;    }    // If the japanese title is not just only kana, get the reading    if (!kanjiRegexes.kanaOnly.test(jaLabel)) {        requestKana();    }}function buildRegexes(kanji) {    // Strip $kanji of all kanji and kana, adding whatever is left to the regex    var reKanjiKana = /[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6Aぁ-ゔァ-ヴー-]/g;    var kanjiStripped = kanji.replace(reKanjiKana, "");    kanjiStripped += " ";    // Need to add hyphen escaped since it has special behavior in regex classes    kanjiStripped += "\\-";    var kanjiAuxillary = kanjiStripped.replace(/\w/g, "");    kanjiRegexes.latinOnly = /^[A-Za-z0-9\-.?!/,:;@#$%&+=*'"・ ]+$/;    kanjiRegexes.kanaOnly = new RegExp("^[ぁ-ゔァ-ヴー" + kanjiAuxillary + "]+$");    kanjiRegexes.hiraganaOnly = new RegExp("^[ぁ-ゔーA-Za-z" + kanjiAuxillary + "]+$");    kanjiRegexes.katakanaOnly = new RegExp("^[ァ-ヴーA-Za-z" + kanjiAuxillary + "]+$");    // Add midpoint for Latin in titles    if (/\w/.test(kanji)) { kanjiStripped += "・"; }    var leadReBase = "([ぁ-ゔァ-ヴー" + kanjiStripped + "]+)";    var kanjiEscaped = mw.util.escapeRegExp(kanji);    // Account for spaces, but ignore backslash and other misc characters    var reKanjiKanaLatin = /([\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6Aぁ-ゔァ-ヴーA-Za-z0-9])/g;    var kanjiSpaced = kanjiEscaped.replace(/ /g, " ?");    kanjiSpaced = kanjiSpaced.replace(reKanjiKanaLatin, "$1 ?");    // Add kanji to regex to make sure we're not getting the reading of some    // other term    kanjiRegexes.lead = new RegExp(kanjiSpaced + "[^(\n)]*?\\(" + leadReBase, "i"); // brittle}function displayKanji(kanji) {wikidataKanji = kanji;    $('#kanjiInfo').append("<ruby>" + kanji + "</ruby>");    // Add some classes so users can choose to not display for example    // katakana-only kanji in their CSS    if (kanjiRegexes.latinOnly.test(kanji)) {        $("#kanjiInfo").addClass("kanjiInfo-latin-only");        $("#kanjiInfo").prop("title", "Japanese title in Latin script");        $("#kanjiInfo").css("display", "none");    } else if (kanjiRegexes.hiraganaOnly.test(kanji)) {        $("#kanjiInfo").addClass("kanjiInfo-hiragana-only");        $("#kanjiInfo").prop("title", "Japanese title in hiragana");    } else if (kanjiRegexes.katakanaOnly.test(kanji)) {        $("#kanjiInfo").addClass("kanjiInfo-katakana-only");        $("#kanjiInfo").prop("title", "Japanese title in katakana");    } else {    $("#kanjiInfo").prop("title", "Japanese title in kanji");    }}function requestKana() {    // API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetclaims    // We have to wholesale get all the claims instead of just one because the    // kana might be present as a qualifier to another claim    $.ajax({        url: "https://www.wikidata.org/w/api.php",        data: {            action: "wbgetclaims",            entity: wikidataId,            format: "json",            origin: "*"        },        success: parseKanaClaim    });}function parseKanaClaim(response) {    var kana;    var properties = {                     title: "P1476",                         nativeLabel: "P1705",                         officialName: "P1448",                     nameInNativeLanguage: "P1559"                     };    var nameInKana = "P1814";        // Try getting nameInKana as a qualifier to some properties                      for (var prop in properties) {    var pnum = properties[prop];        if (response.claims[pnum]) {            var kanji = response.claims[pnum][0].mainsnak.datavalue.value.text;            if (kanji.replace(/ /g, "") == wikidataKanji.replace(/ /g, "")                && response.claims[pnum][0].qualifiers                && response.claims[pnum][0].qualifiers[nameInKana]) {                kana = response.claims[pnum][0].qualifiers[nameInKana][0].datavalue.value;            break;            }    }    }    // Try getting nameInKana as a general claim    if (!kana && response.claims[nameInKana]) {    prop = "nameInKana";        kana = response.claims[nameInKana][0].mainsnak.datavalue.value;    }        // We couldn't find nameInKana    if (!kana) {        getInterlanguage();        return;    }    kana = kana.toHalfWidth();    displayKana(kana);    $("#kanjiInfo").addClass("kanjiInfo-wikidata");    $("#kanjiInfo").addClass("kanjiInfo-wikidata-" + prop);}function getInterlanguage() {    var apiUrl = location.origin + "/w/api.php";    // Documentation: https://www.search.com.vn/wiki/api.php?lang=en&action=help&modules=query%2Blanglinks    $.ajax({        url: apiUrl,        data: {            action: "query",            format: "json",            prop: "langlinks",            lllang: "ja",            titles: mw.config.get( 'wgTitle' )        },        success: function(response) {        var pageId = mw.config.get( 'wgArticleId' );        var page = response.query.pages[pageId];            var langlinks = page ? page.langlinks : undefined;        var jaLabel;        if (langlinks) {            jaLabel = langlinks[0]["*"];            jaLabel = jaLabel.replace(/(.*)#.*/, "$1"); // rm anchors        } else {        getWiktionary();        return;        }        scrapeKana(jaLabel);        }    });}function scrapeKana(jaLabel) {    // Get jawiki article's lead wikitext    // API docs: https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bextracts    $.ajax({        url: "https://ja.wikipedia.org/w/api.php",        data: {            action: "query",            prop: "extracts",            format: "json",            redirects: true,            exintro: true,            exsentences: 2,            exlimit: 1,            explaintext: true,            titles: jaLabel,            origin: "*"        },        success: getFirstSentence    });}function getFirstSentence(response) {    var responsePart = response.query.pages;    // Have to split parsing into two parts since jawiki pageid is unknown    var pageId = Object.keys(responsePart)[0];    var introText = responsePart[pageId].extract;    if (!introText) {        console.error("showKanji.js: TextExtracts failed to get a lead for the Japanese article.");        getWiktionary();        return;    }    var wikitext = introText.toHalfWidth();    var kana;    var kanaSearch = wikitext.match(kanjiRegexes.lead);    if (kanaSearch && kanaSearch.length == 2) {        kana = kanaSearch[1];    } else {    getWiktionary();        return;    }    // Rm trailing characters    kana = kana.replace(/[・、 ]$/, "");    // Abort if our reading is only katakana (for non-Latin) or Latin     if ((!kanjiRegexes.latinOnly.test(wikidataKanji) && kanjiRegexes.katakanaOnly.test(kana))        || kanjiRegexes.latinOnly.test(kana)) {    getWiktionary();    return;    }    displayKana(kana);    $("#kanjiInfo").addClass("kanjiInfo-jawiki");}// Adapted from://     http://ilog4.blogspot.com/2015/09/javascript-convert-full-width-and-half.html//     https://stackoverflow.com/a/20488304/1995949//     https://www.search.com.vn/wiki/en/Halfwidth_and_fullwidth_formsString.prototype.toHalfWidth = function() {    var halfWidth = this.replace(/[\uff01-\uff5e]/g, function(s) {return String.fromCharCode(s.charCodeAt(0) - 0xFEE0)});    halfWidth = halfWidth.replace(/ /g, " ");    return halfWidth;};// We use the English Wiktionary because it has more terms and better structurefunction getWiktionary() {// API docs: https://www.search.com.vn/wiki/api.php?lang=en&action=help&modules=parse    $.ajax({        url: "https://en.wiktionary.org/w/api.php",        data: {            action: "parse",            format: "json",            page: wikidataKanji,            prop: "sections",            origin: "*"        },        success: findJapaneseSection    });}function findJapaneseSection(response) {if (response.error) {return;}    var sectionsCount = response.parse.sections.length;    var sectionIndex;    for (let i = 0; i < sectionsCount; i++) {        var sectionHeader = response.parse.sections[i].line;        if (sectionHeader == "Japanese") {        sectionIndex = response.parse.sections[i].index;        break;        }    }        if (sectionIndex == null) {    return;    }// API docs: https://www.search.com.vn/wiki/api.php?lang=en&action=help&modules=parse    $.ajax({        url: "https://en.wiktionary.org/w/api.php",        data: {            action: "parse",            format: "json",            page: wikidataKanji,            prop: "text",            section: sectionIndex,            origin: "*"        },        success: parseWiktionary    });    }function parseWiktionary(response) {var html = response.parse.text["*"];var parsed = $($.parseHTML(html));// Wiktionary adds readings as furiganavar headword = parsed.find(".headword:lang(ja)").first();var seeTable = parsed.find(".Jpan ruby").first();var kanji = "";var kana = "";if (headword.length) {    // Wiktionary already binds their kana, so we have to undo the process to get    // the constituent parts, at least with the current markup    var childNodes = headword[0].childNodes;    for (let i = 0; i < childNodes.length; i++) {    if (childNodes[i].nodeName == "RUBY") {    var ruby = $(childNodes[i]); // convert back to JQuery for convenience    ruby.children("rp").remove();    kana += ruby.children("rt").detach().text();    kanji += ruby.text();    } else if (childNodes[i].nodeType == 3) { // "#text"    kanji += childNodes[i].nodeValue;    kana += childNodes[i].nodeValue;    }        }        if (kanji != wikidataKanji) { return; }} else if (seeTable.length) {kanji = seeTable.children("rb").text();kana = seeTable.children("rt").text();} else {return;}if (kana) {displayKana(kana);$("#kanjiInfo").addClass("kanjiInfo-wiktionary");}}function displayKana(kana) {    $("#kanjiInfo ruby").append("<rt>" + kana + "</rt>");    // Cleanup redundant furigana with another script    var kanjiOnlyRe = /^[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6A]+$/;    if (!kanjiOnlyRe.test(wikidataKanji)) {       mw.loader.load( 'https://www.search.com.vn/wiki/index.php?lang=en&q=User:Opencooper/bindKana.js&action=raw&ctype=text/javascript' );    }}var wikidataId = mw.config.get( 'wgWikibaseItemId' );var wikidataKanji;var kanjiRegexes = {};$(setup);