User:Connel MacKenzie/custom.js/ajaxtranslinks.js

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Note – after saving, you may have to bypass your browser’s cache to see the changes.

  • Mozilla / Firefox / Safari: hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Command-R on a Macintosh);
  • Konqueror and Chrome: click Reload or press F5;
  • Opera: clear the cache in Tools → Preferences;
  • Internet Explorer: hold Ctrl while clicking Refresh, or press Ctrl-F5.

/*
<pre>
*/
 
// TODO doesn't handle interwikis
// TODO handles self links but not unlinked words
// TODO selflinked language name causes breakage
// TODO Serbian Latin subentries may wrongly match Latin!
// TODO handle cases like: *Hungarian: [[tönköly]] [[búza]]
// TODO missing : after language name can cause breakage with subentries
// TODO handle: Finnish: jargon (1, 2), ammattikieli (1, 2), slangi (2), mongerrus (3)
// TODO handle subentries which are not sublanguages: Chinese and Japanese entries at "Mongolian"
// TODO handle ''trad.'' and ''simpl.'' in some Chinese entries
// TODO Cantonese and Mandarin as sublangs can match each other
// TODO handle wikified sublanguages
// TODO handle translations that have both a main entry and subentries ([[corn]] German)
 
// abstract function to do xmlhttprequest
function ajax(url, on200, on404) {
  var req = window.XMLHttpRequest ? new XMLHttpRequest() : new ActiveXObject('Microsoft.XMLHTTP');
  req.open('GET', url, true);
  req.onreadystatechange = function() {
    if (req.readyState == 4) {
      if (req.status == 200) {
        // TODO handle xmlhttprequest cache bug in some msie versions
        if (req.responseText == '') {
          on404();
        } else {
          on200(req);
        }
      } else if (req.status == 404) {
        on404();
      } else {
        //debugPrint('bad rsc status ' + req.status + '\n');
      }
    }
  };
  req.send(null);
}
 
function parse(list_item, anchor, term, lang, sublang, page) {
  var state = 0;
  var arr = page.split("\n");
  var line;
  var langs = lang;

  // Ancient Greek
  if (sublang)
    langs += '|' + sublang + '|' + sublang + ' ' + lang + '|' + lang + ' \\(' + sublang + '\\)|' + lang + ', ' + sublang;

  // Chinese
  if (lang == 'Chinese')
    langs += '|Mandarin|Cantonese';
  else if (lang == 'Mandarin' || lang == 'Cantonese')
    langs += '|Chinese';

  // CJKV/Han characters
  else if (lang.match(/(CJKV?|Chinese) [cC]haracters/))
    langs += '|Translingual';

  // Other synonyms
  else if (lang == 'Guaraní') langs += '|Guarani';
  else if (lang == 'Guarani') langs += '|Guaraní';
  else if (lang == 'Scots Gaelic') langs += '|Scottish Gaelic';
  else if (lang == 'Scottish Gaelic') langs += '|Scots Gaelic';
  else if (lang == 'Slovak') langs += '|Slovakian';
  else if (lang == 'Slovakian') langs += '|Slovak';
  else if (lang == 'Slovene') langs += '|Slovenian';
  else if (lang == 'Slovenian') langs += '|Slovene';
  else if (lang == 'Tupinambá') langs += '|Tupinamba';
  else if (lang == 'Tupinamba') langs += '|Tupinambá';

  var rx1 = new RegExp('^==\\s*(' + langs + ')\\s*==');
  var rx2 = new RegExp('^==\\s*\\[\\[(' + langs + ')]]\\s*==');
 
  anchor.className = 'dunno';
  anchor.title = 'Looking...';
 
  for (var i = 0; i < arr.length; i++) {
    line = arr[i];

    if (line.match(/#\s*[rR][eE][dD][iI][rR][eE][cC][tT]/)) {
      state = -1;
      break;
    } else {
      var foundlang;
      var r;

      r = line.match(rx1);
      if (r != null)
        foundlang = r[1];
      r = line.match(rx2);
      if (r != null)
        foundlang = r[1];

      if (foundlang != null) {
        // page exists and has an entry for this language
        anchor.className = '';
        anchor.title = term + ' exists in ' + foundlang;
        if (anchor.href.indexOf('#') == -1) {
          // TODO unicode in lang names breaks: Guaraní -> #Guaran%C3%AD but #Guaran.C3.AD
          anchor.href += '#' + foundlang;
          //anchor.title = term + ' exists in ' + lang + ' (anchor added)';
        }
        state = 1;
        break;
      }
    }
  }

  if (state == 0) {
    // page exists but has no entry for this language
    anchor.className = 'new partlynew';
    anchor.title = term + ' exists but not in ' + (sublang ? lang + ' (' + sublang + ')' : lang);
  } else if (state == -1) {
    // page exists but is a redirect
    anchor.className = 'redirect';
    anchor.title = term + ' exists but is a redirect';
  }
}
 
function lookup_langs(list_item, anchor, term, lang, sublang) {
  function on200(req) {
    parse( list_item, anchor, term, lang, sublang, req.responseText );
  };
  function on404() {
    //list_item.appendChild ( document.createTextNode( ' ' + lang + '/' + term + ':(404)' ) );
  };
  var url = wgScript + '?title=' + term.replace(' ', '_') + '&action=raw';
  ajax(url, on200, on404);
}
 
function ajaxtranslinks() {
 
  if (wgNamespaceNumber != 0) return;
 
  var lis = null;
 
  // find the translations section
  // TODO on en.wikt it has no id so we must check all h4 and h5 tags
  if (wgServer == 'http://en.wiktionary.org') {
    var bc = document.getElementById('bodyContent');
 
    if (bc != null) {
      var trans_h = null;
 
      var hl = 4; // heading level: check h4 and h5 only
      var hi;     // heading index
 
      outer : for (hl = 4; hl <= 5; hl++) {
        var hs = bc.getElementsByTagName('h' + hl.toString());
        for (var i = 0; i < hs.length; i++ ) {
          var spans = hs[i].getElementsByTagName('span');
          // 1st or 2nd span depending on whether edit link exists
          if (spans[spans.length - 1].firstChild.nodeValue == 'Translations') {
            trans_h = hs[i];
            break outer;
          }
        }
      }
 
      if (trans_h != null) {
        var t = trans_h.nextSibling.nextSibling;
 
        if ((t.tagName == 'DIV' && t.className == 'rfc-trans') || // {{rfc-trans}}
            (t.tagName == 'P' && t.firstChild.tagName == 'B') ||  // '''subheading'''
            (t.tagName == 'DL'))                                  // ;subheading
            t = t.nextSibling.nextSibling;

        if (t.tagName == 'TABLE' || t.tagName == 'UL')
          lis = t.getElementsByTagName('li');
        else {
          if ( t.childNodes[3] ) {
            t = t.childNodes[3].childNodes[1];
            lis = t.getElementsByTagName('li');
          }
        }
      }
    }
  } else if (wgServer == 'http://wiktionarydev.leuksman.com') {
    var t = document.getElementById('Translations');
 
    if (t != null) {
      t = t.parentNode.parentNode;
 
      lis = t.getElementsByTagName('li');
    }
  }
 
  if (lis != null) {
    // each language in the translation table
    for (var i = 0; i < lis.length; i++) {
      var li = lis[i];
      var subentries = li.getElementsByTagName('dl'); // Chinese & Serbian mostly
      var links = li.getElementsByTagName('a');
      var strongs = li.getElementsByTagName('strong');
      var lang;
      var sublang;
      var offset;
 
      // plain lang label
      if (li.firstChild.nodeType == 3) {
        // TODO handle missing colon especially with sublanguages following
        lang = li.firstChild.nodeValue.replace(/\s*(.*?)\s*:\s*/, '$1');
        offset = 0;
 
      // ttbc
      } else if (li.firstChild.tagName == 'SPAN' && li.firstChild.className == 'ttbc') {
        lang = li.firstChild.firstChild.nodeValue;
        offset = 0;

      // wikified lang label
      } else {
        if (links[0]) {
          lang = links[0].firstChild.nodeValue;
          offset = 1;
        }
      }

      if (links.length != 0) {
        // each red or blue link for this language
        for (j = offset; j < links.length; j++) {
          var anchor = links[j];
          var term = anchor.title ? anchor.title : anchor.firstChild.nodeValue;
          sublang = null;
   
          // TODO handle inline sublanguages: Greek, Ancient
          if (lang.indexOf(', ') != -1) {
            var r = lang.match(/(.*?), (.*)/);
            lang = r[1];
            sublang = r[2];

          // multiline sublanguages
          } else if (subentries.length != 0) {
            p = anchor.parentNode;
            if (p.tagName == 'SPAN')  // KUchar etc template
              p = p.parentNode;
            if ( p.firstChild ) if ( p.firstChild.nodeValue ) sublang = 
              p.firstChild.nodeValue.replace(/\s*(.*?)\s*:\s*/, '$1');
          }

          if (term.indexOf(':') != -1) {
            anchor.title = term + ' is on another wiki';
          } else if (anchor.className.match(/\bnew\b/)) {
            anchor.title = term + ' does not exist at all';
          } else {
            lookup_langs(li, anchor, term, lang, sublang);
          }
        }
      }
 
      // each self link for this language (should never be more than one)
      if (strongs.length != 0) {
        var strong = strongs[0];
 
        var anchor = document.createElement('a');
        anchor.appendChild(document.createTextNode(wgTitle));
        anchor.href = wgArticlePath.replace('$1', wgTitle);
        strong.parentNode.replaceChild(anchor,strong);
 
        // TODO do we ever need sublang here?
        lookup_langs(li, anchor, wgTitle, lang, null);
      }
    }
  }
}

$( ajaxtranslinks );
 
/*
</pre>
*/