mediawiki-extensions-Cite/js/lib/ext.Cite.js
Subramanya Sastry fb27cd0b4a Handle multiply-referenced ref-tags without duplicate ref-content.
* When named-ref-tags are used multiple times in a document, the
  Cite.onReferences function was outputting the ref-content once
  for each use in the references section. While this is "harmless"
  for regular ref-content, when the ref-content has templates
  (ex: {[cite .. }}), that template content is duplicated several
  times -- this also means that template expansions seen in the
  output no longer has unique ids. Occasionally, this trips up
  DOM passes (template encapsulation) which make the reasonable
  assumption that template ids are unique. It introduced an
  infinite loop in the code that detects template range nestings
  and overlaps. This is not triggered always, but only in certain
  scenarios. en:Euphoria (Enrique Iglesias album) was a page that
  sent the parser into an infinite loop.

* Now fixed by outputting ref-content once.

* In addition, added an error-check in the DOM post-pass that might
  get stuck in an infinite loop to output an error message.

* No change in parser test results.

* en:Euphoria (Enrique Iglesias album) now parses and rts without
  getting stuck in an infinite loop.  Possibly other pages as well.

Change-Id: I81a055999eefe3ca36dfb8a9cda3fee538ae6c65
2013-02-20 17:50:52 -06:00

287 lines
7.3 KiB
JavaScript

"use strict";
var TokenCollector = require( './ext.util.TokenCollector.js' ).TokenCollector,
Util = require( './mediawiki.Util.js' ).Util,
$ = require( './fakejquery' );
/**
* Simple token transform version of the Cite extension.
*
* @class
* @constructor
*/
function Cite ( manager, options ) {
this.manager = manager;
this.options = options;
this.reset();
// Set up the collector for ref sections
new TokenCollector(
manager,
this.handleRef.bind(this),
true, // match the end-of-input if </ref> is missing
this.rank,
'tag',
'ref'
);
// And register for references tags
manager.addTransform( this.onReferences.bind(this), "Cite:onReferences",
this.referencesRank, 'tag', 'references' );
// And register for cleanup
manager.addTransform( this.reset.bind(this), "Cite:reset",
this.referencesRank, 'end' );
}
Cite.prototype.reset = function ( token ) {
this.refGroups = {};
return { token: token };
};
// Cite should be the first thing to run in pahse 3 so the <ref>-</ref>
// content tokens are pulled out of the token stream and dont pollute
// the main token stream with any unbalanced tags/pres and the like.
Cite.prototype.rank = 2.01;
Cite.prototype.referencesRank = 2.6;
/**
* Handle ref section tokens collected by the TokenCollector.
*/
Cite.prototype.handleRef = function ( tokens ) {
// remove the first ref tag
var startTsr, endTsr,
startTag = tokens.shift();
startTsr = startTag.dataAttribs.tsr;
if ( tokens[tokens.length - 1].name === 'ref' ) {
var endTag = tokens.pop();
endTsr = endTag.dataAttribs.tsr;
}
var options = $.extend({
name: null,
group: null
}, Util.KVtoHash(startTag.attribs));
var group = this.getRefGroup(options.group),
ref = group.add(tokens, options),
//console.warn( 'added tokens: ' + JSON.stringify( this.refGroups, null, 2 ));
linkback = ref.linkbacks[ref.linkbacks.length - 1];
var bits = [];
if (options.group) {
bits.push(options.group);
}
//bits.push(Util.formatNum( ref.groupIndex + 1 ));
bits.push(ref.groupIndex + 1);
var about = "#" + this.manager.env.newObjectId(),
text = this.manager.env.page.src,
span = new TagTk('span', [
new KV('id', linkback),
new KV('class', 'reference'),
new KV('about', about),
new KV('typeof', 'mw:Object/Ext/Cite')
]);
if (startTsr) {
// For template ref tokens, both start and end tsr's are stripped.
// So, if there is a start-tsr, there will also be an end-tsr.
// And, if absent, it is safe to go to end-of-text.
var start = startTsr[0],
end = endTsr ? endTsr[1] : text.length;
span.dataAttribs = {
tsr: [start, end]
};
}
// NOTE: endTsr can be undefined below when it has been
// stripped from ref-tags coming from template/extension content.
var res = [
span,
new TagTk( 'a', [
new KV('href', '#' + ref.target)
]
),
'[' + bits.join(' ') + ']',
new EndTagTk( 'a' ),
new EndTagTk( 'span' ),
new SelfclosingTagTk( 'meta', [
new KV( 'typeof', 'mw:Object/Ext/Cite/End' ),
new KV( 'about', about)
], { tsr: endTsr } )
];
//console.warn( 'ref res: ' + JSON.stringify( res, null, 2 ) );
return { tokens: res };
};
function genPlaceholderTokens(env, token, src) {
var tsr = token.dataAttribs.tsr, dataAttribs;
if (tsr) {
// src from original src
dataAttribs = { tsr: tsr, src: env.page.src.substring(tsr[0], tsr[1]) };
} else {
// Use a default string
dataAttribs = { src: src };
}
return [
new SelfclosingTagTk('meta', [ new KV( 'typeof', 'mw:Placeholder' ) ], dataAttribs)
];
}
/**
* Handle references tag tokens.
*
* @method
* @param {Object} TokenContext
* @returns {Object} TokenContext
*/
Cite.prototype.onReferences = function ( token, manager ) {
function processRefTokens(ref) {
var out;
// pipeline for processing ref-content
// NOTE: This is a synchronous pipeline
var pipeline = manager.pipeFactory.getPipeline(
'tokens/x-mediawiki/post-expansion',
{ wrapTemplates: false, inBlockToken: true }
);
pipeline.addListener('chunk', function(toks) {
out = Util.stripEOFTkfromTokens(toks);
});
pipeline.addListener('end', function() {});
ref.tokens.push(new EOFTk());
pipeline.process(ref.tokens);
return out;
}
if ( token.constructor === EndTagTk ) {
return { tokens: genPlaceholderTokens(this.manager.env, token, "</references>") };
}
//console.warn( 'references refGroups:' + JSON.stringify( this.refGroups, null, 2 ) );
var refGroups = this.refGroups;
var arrow = '↑';
var renderLine = function( ref ) {
var out = [ new TagTk('li', [new KV('id', ref.target)] ) ];
if (ref.linkbacks.length === 1) {
out = out.concat([
new TagTk( 'a', [ new KV('href', '#' + ref.linkbacks[0]) ]),
arrow,
new EndTagTk( 'a' )
]);
} else {
out.push( arrow );
$.each(ref.linkbacks, function(i, linkback) {
out = out.concat([
new TagTk( 'a', [
new KV('data-type', 'hashlink'),
new KV('href', '#' + ref.linkbacks[0])
]
),
// XXX: make formatNum available!
//{
// type: 'TEXT',
// value: Util.formatNum( ref.groupIndex + '.' + i)
//},
ref.groupIndex + '.' + i,
new EndTagTk( 'a' ), " "
]);
});
}
// Output ref tokens once!
out = out.concat(processRefTokens(ref));
//console.warn( 'renderLine res: ' + JSON.stringify( out, null, 2 ));
return out;
};
var res,
attribHash = Util.KVtoHash(token.attribs),
// Default to null group if the group param is actually empty
dataAttribs,
group = attribHash.group;
if ( group && group.constructor === Array ) {
// Array of tokens, convert to string.
group = Util.tokensToString(group);
}
if ( group ) {
// have a String, strip whitespace
group = group.replace(/^\s*(.*)\s$/, '$1');
}
// Point invalid / empty groups to null
if ( ! group ) {
group = null;
}
if (group in refGroups) {
var group = refGroups[group],
listItems = $.map(group.refs, renderLine );
dataAttribs = Util.clone(token.dataAttribs);
dataAttribs.src = token.getWTSource(this.manager.env);
res = [
new TagTk( 'ol', [
new KV('class', 'references'),
new KV('typeof', 'mw:Object/References')
], dataAttribs)
].concat( listItems, [ new EndTagTk( 'ol' ) ] );
} else {
res = genPlaceholderTokens(this.manager.env, token, "<references />");
}
//console.warn( 'references res: ' + JSON.stringify( res, null, 2 ) );
return { tokens: res };
};
Cite.prototype.getRefGroup = function(group) {
var refGroups = this.refGroups;
if (!(group in refGroups)) {
var refs = [],
byName = {};
refGroups[group] = {
refs: refs,
byName: byName,
add: function(tokens, options) {
var ref;
if (options.name && options.name in byName) {
ref = byName[options.name];
} else {
var n = refs.length,
key = n + '';
if (options.name) {
key = options.name + '-' + key;
}
ref = {
tokens: tokens,
index: n,
groupIndex: n, // @fixme
name: options.name,
group: options.group,
key: key,
target: 'cite_note-' + key,
linkbacks: []
};
refs[n] = ref;
if (options.name) {
byName[options.name] = ref;
}
}
ref.linkbacks.push(
'cite_ref-' + ref.key + '-' + ref.linkbacks.length
);
return ref;
}
};
}
return refGroups[group];
};
if (typeof module === "object") {
module.exports.Cite = Cite;
}