mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-27 15:50:29 +00:00
Shorten data-mw-rt to data-mw and clean up whitelist
Instead of a proliferation of data-mw-* attributes, it should be easier to stash all private / non-semantic round-trip information in a JSON object stored in data-mw. Change-Id: Id200a6a8789fa152f29ea530e5a24b6ee7b4b285
This commit is contained in:
parent
5ef3438ee5
commit
f662690d02
|
@ -86,7 +86,7 @@ Sanitizer.prototype._stripIDNs = function ( host ) {
|
|||
* Sanitize any tag.
|
||||
*
|
||||
* XXX: Make attribute sanitation reversible by storing round-trip info in
|
||||
* token.dataAttribs object (which is serialized as JSON in a data-mw-rt
|
||||
* token.dataAttribs object (which is serialized as JSON in a data-mw
|
||||
* attribute in the DOM).
|
||||
*/
|
||||
Sanitizer.prototype.onAny = function ( token ) {
|
||||
|
|
|
@ -76,8 +76,8 @@ FauxHTML5.TreeBuilder.prototype.processToken = function (token) {
|
|||
}
|
||||
token.attribs.push(
|
||||
{
|
||||
// 'Mediawiki round-trip' information
|
||||
k: 'data-mw-rt',
|
||||
// Mediawiki-specific round-trip / non-semantic information
|
||||
k: 'data-mw',
|
||||
v: JSON.stringify( token.dataAttribs )
|
||||
} );
|
||||
}
|
||||
|
|
|
@ -8,12 +8,12 @@ testWhiteList = {};
|
|||
// formatting is identical
|
||||
testWhiteList["Italics and bold"] = "<ul><li> plain</li><li> plain<i>italic</i>plain</li><li> plain<i>italic</i>plain<i>italic</i>plain</li><li> plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<b>bold</b>plain</li><li> plain<i>italic</i>plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<i>italic</i>plain</li><li> plain<i>italic<b>bold-italic</b>italic</i>plain</li><li> plain<b>bold<i>bold-italic</i>bold</b>plain</li><li> plain<i><b>bold-italic</b>italic</i>plain</li><li> plain<i><b>bold-italic</b></i><b>bold</b>plain</li><li> plain<i>italic<b>bold-italic</b></i>plain</li><li> plain<b>bold<i>bold-italic</i></b>plain</li><li> plain l'<i>italic</i>plain</li><li> plain l'<b>bold</b> plain</li></ul>";
|
||||
|
||||
testWhiteList["Bug 2702: Mismatched <i>, <b> and <a> tags are invalid"] = "<p><i><a href=\"http://example.com\">text</a></i><a href=\"http://example.com\" data-mw-sourcePos=\"30:61\"><b>text</b></a><i data-mw-sourcePos=\"62:106\">Something <a href=\"http://example.com\">in italic</a></i><i data-mw-sourcePos=\"107:164\">Something <a href=\"http://example.com\">mixed</a></i><a href=\"http://example.com\"><b>, even bold</b></a><i data-mw-sourcePos=\"165:204\"><b data-mw-sourcePos=\"165:204\">Now <a href=\"http://example.com\">both</a></b></i></p>";
|
||||
testWhiteList["Bug 2702: Mismatched <i>, <b> and <a> tags are invalid"] = "<p><i><a href=\"http://example.com\">text</a></i><a href=\"http://example.com\"><b>text</b></a><i>Something <a href=\"http://example.com\">in italic</a></i><i>Something <a href=\"http://example.com\">mixed</a></i><a href=\"http://example.com\"><b>, even bold</b></a><i><b>Now <a href=\"http://example.com\">both</a></b></i></p>";
|
||||
|
||||
testWhiteList["Unclosed and unmatched quotes"] = "<p data-mw-sourcePos=\"0:66\"><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b>Bold text..</b></p><p>..spanning two paragraphs (should not work).<b></b></p><p><b>Bold tag left open</b></p><p><i>Italic tag left open</i></p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>";
|
||||
testWhiteList["Unclosed and unmatched quotes"] = "<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b>Bold text..</b></p><p>..spanning two paragraphs (should not work).<b></b></p><p><b>Bold tag left open</b></p><p><i>Italic tag left open</i></p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>";
|
||||
|
||||
// The expected result for this test is really broken html.
|
||||
testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p data-mw-sourcePos=\"0:45\"><i>Some <a data-mw-type=\"internal\" href=\"/wiki/Link\">pretty </a></i><a data-mw-type=\"internal\" href=\"/wiki/Link\">italics<i> and stuff</i></a><i>!</i></p>";
|
||||
testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"/wiki/Link\">pretty </a></i><a href=\"/wiki/Link\">italics<i> and stuff</i></a><i>!</i></p>";
|
||||
|
||||
testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"http://example.com/\">pretty </a></i><a href=\"http://example.com/\">italics<i> and stuff</i></a><i>!</i></p>";
|
||||
|
||||
|
@ -22,13 +22,13 @@ testWhiteList["5 quotes, code coverage +1 line"] = "<p>'<i></i></p>";
|
|||
|
||||
// The comment in the test already suggests this result as correct, but
|
||||
// supplies the old result without preformatting.
|
||||
testWhiteList["Bug 6200: Preformatted in <blockquote>"] = "<blockquote data-mw-sourcePos=\"0:12\"><pre>\nBlah</pre></blockquote>";
|
||||
testWhiteList["Bug 6200: Preformatted in <blockquote>"] = "<blockquote><pre>\nBlah</pre></blockquote>";
|
||||
|
||||
|
||||
// empty table tags / with only a caption are legal in HTML5.
|
||||
testWhiteList["A table with no data."] = "<table></table>";
|
||||
testWhiteList["A table with nothing but a caption"] = "<table><caption> caption</caption></table>";
|
||||
testWhiteList["Fuzz testing: Parser22"] = "<p data-mw-sourcePos=\"0:23\"><a href=\"http://===r:::https://b\">http://===r:::https://b</a></p><table></table>";
|
||||
testWhiteList["Fuzz testing: Parser22"] = "<p><a href=\"http://===r:::https://b\">http://===r:::https://b</a></p><table></table>";
|
||||
|
||||
// MediaWiki changes the order of attributes in tables, ignore that
|
||||
testWhiteList["Multiplication table"] = "<table border=\"1\" cellpadding=\"2\"><caption>Multiplication table</caption><tbody><tr><th> × </th><th> 1 </th><th> 2 </th><th> 3</th></tr><tr><th> 1</th><td> 1 </td><td> 2 </td><td> 3</td></tr><tr><th> 2</th><td> 2 </td><td> 4 </td><td> 6</td></tr><tr><th> 3</th><td> 3 </td><td> 6 </td><td> 9</td></tr><tr><th> 4</th><td> 4 </td><td> 8 </td><td> 12</td></tr><tr><th> 5</th><td> 5 </td><td> 10 </td><td> 15</td></tr></tbody></table>";
|
||||
|
@ -37,7 +37,7 @@ testWhiteList["Nested table"] = "<table border=\"1\"><tbody><tr><td> α</td><td>
|
|||
|
||||
// Very minor whitespace difference at end of cell (MediaWiki inserts a
|
||||
// newline before the close tag even if there was no trailing space in the cell)
|
||||
testWhiteList["Table rowspan"] = "<table border=\"1\" data-mw-sourcePos=\"0:121\"><tbody><tr><td> Cell 1, row 1 </td><td rowspan=\"2\"> Cell 2, row 1 (and 2) </td><td> Cell 3, row 1 </td></tr><tr><td> Cell 1, row 2 </td><td> Cell 3, row 2 </td></tr></tbody></table>";
|
||||
testWhiteList["Table rowspan"] = "<table border=\"1\"><tbody><tr><td> Cell 1, row 1 </td><td rowspan=\"2\"> Cell 2, row 1 (and 2) </td><td> Cell 3, row 1 </td></tr><tr><td> Cell 1, row 2 </td><td> Cell 3, row 2 </td></tr></tbody></table>";
|
||||
|
||||
// Inter-element whitespace only
|
||||
testWhiteList["Indented table markup mixed with indented pre content (proposed in bug 6200)"] = " \n\n<table><tbody><tr><td><pre>\nText that should be rendered preformatted\n</pre></td></tr></tbody></table>";
|
||||
|
@ -48,7 +48,7 @@ testWhiteList["Indented table markup mixed with indented pre content (proposed i
|
|||
|
||||
// Single quotes are legal in HTML5 URIs. See
|
||||
// http://www.whatwg.org/specs/web-apps/current-work/multipage/urls.html#url-manipulation-and-creation
|
||||
testWhiteList["Link containing double-single-quotes '' (bug 4598)"] = "<p><a href=\"/wiki/Lista_d''e_paise_d''o_munno\" data-mw-type=\"internal\">Lista d''e paise d''o munno</a></p>";
|
||||
testWhiteList["Link containing double-single-quotes '' (bug 4598)"] = "<p><a href=\"/wiki/Lista_d''e_paise_d''o_munno\">Lista d''e paise d''o munno</a></p>";
|
||||
|
||||
|
||||
// Sanitizer
|
||||
|
@ -56,15 +56,15 @@ testWhiteList["Invalid attributes in table cell (bug 1830)"] = "<table><tbody><t
|
|||
testWhiteList["Table security: embedded pipes (http://lists.wikimedia.org/mailman/htdig/wikitech-l/2006-April/022293.html)"] = "<table><tbody><tr><td> |<a href=\"ftp://|x||\">[1]</a>\" onmouseover=\"alert(document.cookie)\">test</td></tr></tbody></table>";
|
||||
|
||||
// Sanitizer, but UTF8 in link is ok in HTML5
|
||||
testWhiteList["External link containing double-single-quotes with no space separating the url from text in italics"] = "<p><a href=\"http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm\" data-mw-type=\"external\" data-mw-rt=\"{"sourcePos":[0,146]}\"><i>La muerte de Casagemas</i> (1901) en el sitio de </a><a href=\"/wiki/Museo_Picasso_(París)\" data-mw-type=\"internal\">Museo Picasso</a>.</p>";
|
||||
testWhiteList["External link containing double-single-quotes with no space separating the url from text in italics"] = "<p><a href=\"http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm\" data-mw=\"{"sourcePos":[0,146]}\"><i>La muerte de Casagemas</i> (1901) en el sitio de </a><a href=\"/wiki/Museo_Picasso_(París)\">Museo Picasso</a>.</p>";
|
||||
|
||||
testWhiteList["External links: wiki links within external link (Bug 3695)"] = "<p><a href=\"http://example.com\" data-mw-type=\"external\" data-mw-sourcePos=\"0:54\"></a><a data-mw-type=\"internal\" href=\"/wiki/Wikilink\">wikilink</a> embedded in ext link</p>";
|
||||
testWhiteList["External links: wiki links within external link (Bug 3695)"] = "<p><a href=\"http://example.com\"></a><a href=\"/wiki/Wikilink\">wikilink</a> embedded in ext link</p>";
|
||||
|
||||
testWhiteList["<pre> with forbidden attribute values (bug 3202)"] = "<pre width=\"8\" style=\"\">Narrow screen goodies</pre>";
|
||||
|
||||
// This is valid, just confusing for humans. The reason for disallowing this
|
||||
// might be history by now. XXX: Check this!
|
||||
testWhiteList["Link containing % as a double hex sequence interpreted to hex sequence"] = "<p><a href=\"/wiki/7%2525_Solution\" data-mw-type=\"internal\">7%25 Solution</a></p>";
|
||||
testWhiteList["Link containing % as a double hex sequence interpreted to hex sequence"] = "<p><a href=\"/wiki/7%2525_Solution\">7%25 Solution</a></p>";
|
||||
|
||||
if (typeof module == "object") {
|
||||
module.exports.testWhiteList = testWhiteList;
|
||||
|
|
|
@ -333,7 +333,7 @@ ParserTests.prototype.normalizeHTML = function (source) {
|
|||
// known-ok differences.
|
||||
ParserTests.prototype.normalizeOut = function ( out ) {
|
||||
// TODO: Do not strip newlines in pre and nowiki blocks!
|
||||
return out.replace(/[\r\n]| data-mw-[a-zA-Z-]+="[^">]*"/g, '')
|
||||
return out.replace(/[\r\n]| data-mw="[^">]*"/g, '')
|
||||
.replace(/<!--.*?-->\n?/gm, '')
|
||||
.replace(/<\/?meta[^>]*>/g, '');
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue