Improve preformatted text handling

* Don't escape html-syntax pre content for now; Should parse this with a new
  pre content production later (which needs to be split out of the regular pre
  production in the tokenizer)
* Protect indent-pre content from start-of-line syntax escaping
* Preserve extra leading spaces in the tokenizer
* Two more (now 284) round-trip tests are passing

Change-Id: I199b89c0ee7fae12546df10c1b5117c97caccac5
This commit is contained in:
Gabriel Wicke 2012-06-20 19:28:34 +02:00
parent 6054a4aa14
commit 344fac19b5
2 changed files with 61 additions and 18 deletions

View file

@ -38,19 +38,32 @@ WSP.escapeWikiText = function ( state, text ) {
});
// this is synchronous for now, will still need sync version later, or
// alternatively make text processing in the serializer async
var prefixedText = text;
if ( ! state.onNewline ) {
// Prefix '_' so that no start-of-line wiki syntax matches. Strip it from
// the result.
p.process( '_' + text );
prefixedText = '_' + text;
}
if ( state.inIndentPre ) {
prefixedText = prefixedText.replace(/(\r?\n)/g, '$1_');
}
// FIXME: parse using
p.process( prefixedText );
if ( ! state.onNewline ) {
// now strip the leading underscore.
if ( tokens[0] === '_' ) {
tokens.shift();
} else {
tokens[0] = tokens[0].substr(1);
}
} else {
p.process( text );
}
// state.inIndentPre is handled on the complete output
//
// wrap any run of non-text tokens into <nowiki> tags using the source
// offsets of top-level productions
@ -133,7 +146,12 @@ WSP.escapeWikiText = function ( state, text ) {
console.warn( e );
}
//console.warn( 'escaped wikiText: ' + outTexts.join('') );
return outTexts.join('');
var res = outTexts.join('');
if ( state.inIndentPre ) {
return res.replace(/\n_/g, '\n');
} else {
return res;
}
};
var id = function(v) {
@ -203,6 +221,11 @@ WSP._serializeHTMLTag = function ( state, token ) {
close = '/';
}
if ( token.name === 'pre' ) {
// html-syntax pre is very similar to nowiki
state.inHTMLPre = true;
}
// Swallow required newline from previous token on encountering a HTML tag
//state.emitNewlineOnNextToken = false;
@ -215,6 +238,9 @@ WSP._serializeHTMLTag = function ( state, token ) {
};
WSP._serializeHTMLEndTag = function ( state, token ) {
if ( token.name === 'pre' ) {
state.inHTMLPre = false;
}
if ( ! WSP._emptyTags[ token.name ] ) {
return '</' + token.name + '>';
} else {
@ -471,6 +497,7 @@ WSP.tagHandlers = {
start: {
startsNewline: true,
handle: function( state, token ) {
state.inIndentPre = true;
state.textHandler = function( t ) {
return t.replace(/\n/g, '\n ' );
};
@ -479,7 +506,11 @@ WSP.tagHandlers = {
},
end: {
endsLine: true,
handle: function( state, token) { state.textHandler = null; return ''; }
handle: function( state, token) {
state.inIndentPre = false;
state.textHandler = null;
return '';
}
}
},
meta: {
@ -653,7 +684,8 @@ WSP._serializeToken = function ( state, token ) {
}
break;
case String:
res = state.inNoWiki? token : this.escapeWikiText( state, token );
res = ( state.inNoWiki || state.inHTMLPre ) ? token
: this.escapeWikiText( state, token );
res = state.textHandler ? state.textHandler( res ) : res;
break;
case CommentTk:

View file

@ -422,6 +422,7 @@ block_lines
// eat an empty line before the block
s2:(os:optionalSpaceToken so:sol { return os.concat(so) })?
bl:block_line {
//console.warn( pp(s));
var s2_ = (s2 !== '') ? s2 : [];
return s.concat(s2_, bl);
}
@ -453,7 +454,13 @@ paragraph
return s1.concat(s2, /* [new TagTk('p')],*/ c);
}
br = space* &newline { return new SelfclosingTagTk( 'br', [], {tsr: [pos0, pos]} ) }
br = s:optionalSpaceToken &newline {
return s.concat(
[
new SelfclosingTagTk( 'br', [], {tsr: [pos0, pos]} )
]
);
}
/*
* Syntax stops: Avoid eating significant tokens for higher-level productions
@ -494,6 +501,7 @@ inline
inlineline
= c:(urltext / !inline_breaks (inline_element / [^\r\n]))+ {
//console.warn('inlineline out:' + pp(c) + input.substr(pos0, pos));
return flatten_stringlist( c );
}
@ -830,9 +838,9 @@ template_param
// MW accepts |foo | = bar | as a single param..
('|' (space / newline)* &'=')?
val:(
s0:space*
s0:optionalSpaceToken
"="
s1:space*
s1:optionalSpaceToken
value:template_param_value? {
return { s0: s0, s1: s1, value: value };
}
@ -1133,6 +1141,7 @@ pre_indent_in_tags
/ & { return stops.dec('pre'); }
pre_indent_line = space l:inlineline {
//console.warn( JSON.stringify( [s, l] ) );
return l;
}
@ -1207,7 +1216,7 @@ nowiki
nowiki_content
= ts:( t:[^<]+ { return t.join('') }
/ "<pre" p0:space* p1:[^>]* ">" p2:nowiki_content "</pre>" {
/ "<pre" p0:optionalSpaceToken p1:[^>]* ">" p2:nowiki_content "</pre>" {
//console.warn('nested pre in nowiki');
return ["<pre"].concat(p0, p1, [">"], p2, ["</pre>"]).join('');
}
@ -1309,9 +1318,9 @@ generic_newline_attribute
// A single-line attribute.
generic_attribute
= s:space*
= s:optionalSpaceToken
name:generic_attribute_name
value:(space*
value:(optionalSpaceToken
v:generic_attribute_value { return v })?
{
//console.warn( 'generic attribute: ' + pp([name, value]));
@ -1666,9 +1675,11 @@ htmlentity = "&" c:[#0-9a-zA-Z]+ ";" {
return unentity("&" + c.join('') + ";")
}
space
spaces
= s:[ \t]+ { return s.join(''); }
space = [ \t]
optionalSpaceToken
= s:space* {
if ( s.length ) {
@ -1695,16 +1706,16 @@ sol
// Eat includeonly/noinclude at start of line, so that start-of-line
// syntax after it still matches
ni:( niStart:({return pos})
space*
s:space*
"<" c:"/"? t:("includeonly" / "noinclude")
">" {return [c, t, [niStart, pos]]} )?
">" {return [s.join(''), c, t, [niStart, pos]]} )?
{
var niToken = [];
if ( ni !== '') {
if ( ni[0] === '/' ) {
niToken = [new EndTagTk( ni[1], [], { tsr: ni[2] } )];
if ( ni[1] === '/' ) {
niToken = [new EndTagTk( ni[2], [], { tsr: ni[3] } )];
} else {
niToken = [new TagTk( ni[1], [], { tsr: ni[2] } )];
niToken = [new TagTk( ni[2], [], { tsr: ni[3] } )];
}
}