diff --git a/modules/parser/mediawiki.WikitextSerializer.js b/modules/parser/mediawiki.WikitextSerializer.js index 6c0c5b0f5f..1726b681af 100644 --- a/modules/parser/mediawiki.WikitextSerializer.js +++ b/modules/parser/mediawiki.WikitextSerializer.js @@ -38,19 +38,32 @@ WSP.escapeWikiText = function ( state, text ) { }); // this is synchronous for now, will still need sync version later, or // alternatively make text processing in the serializer async + var prefixedText = text; if ( ! state.onNewline ) { // Prefix '_' so that no start-of-line wiki syntax matches. Strip it from // the result. - p.process( '_' + text ); + prefixedText = '_' + text; + } + + if ( state.inIndentPre ) { + prefixedText = prefixedText.replace(/(\r?\n)/g, '$1_'); + } + + // FIXME: parse using + p.process( prefixedText ); + + + if ( ! state.onNewline ) { // now strip the leading underscore. if ( tokens[0] === '_' ) { tokens.shift(); } else { tokens[0] = tokens[0].substr(1); } - } else { - p.process( text ); } + + // state.inIndentPre is handled on the complete output + // // wrap any run of non-text tokens into tags using the source // offsets of top-level productions @@ -133,7 +146,12 @@ WSP.escapeWikiText = function ( state, text ) { console.warn( e ); } //console.warn( 'escaped wikiText: ' + outTexts.join('') ); - return outTexts.join(''); + var res = outTexts.join(''); + if ( state.inIndentPre ) { + return res.replace(/\n_/g, '\n'); + } else { + return res; + } }; var id = function(v) { @@ -203,6 +221,11 @@ WSP._serializeHTMLTag = function ( state, token ) { close = '/'; } + if ( token.name === 'pre' ) { + // html-syntax pre is very similar to nowiki + state.inHTMLPre = true; + } + // Swallow required newline from previous token on encountering a HTML tag //state.emitNewlineOnNextToken = false; @@ -215,6 +238,9 @@ WSP._serializeHTMLTag = function ( state, token ) { }; WSP._serializeHTMLEndTag = function ( state, token ) { + if ( token.name === 'pre' ) { + state.inHTMLPre = false; + } if ( ! WSP._emptyTags[ token.name ] ) { return ''; } else { @@ -471,6 +497,7 @@ WSP.tagHandlers = { start: { startsNewline: true, handle: function( state, token ) { + state.inIndentPre = true; state.textHandler = function( t ) { return t.replace(/\n/g, '\n ' ); }; @@ -479,7 +506,11 @@ WSP.tagHandlers = { }, end: { endsLine: true, - handle: function( state, token) { state.textHandler = null; return ''; } + handle: function( state, token) { + state.inIndentPre = false; + state.textHandler = null; + return ''; + } } }, meta: { @@ -653,7 +684,8 @@ WSP._serializeToken = function ( state, token ) { } break; case String: - res = state.inNoWiki? token : this.escapeWikiText( state, token ); + res = ( state.inNoWiki || state.inHTMLPre ) ? token + : this.escapeWikiText( state, token ); res = state.textHandler ? state.textHandler( res ) : res; break; case CommentTk: diff --git a/modules/parser/pegTokenizer.pegjs.txt b/modules/parser/pegTokenizer.pegjs.txt index cb69353390..1f93390fb4 100644 --- a/modules/parser/pegTokenizer.pegjs.txt +++ b/modules/parser/pegTokenizer.pegjs.txt @@ -422,6 +422,7 @@ block_lines // eat an empty line before the block s2:(os:optionalSpaceToken so:sol { return os.concat(so) })? bl:block_line { + //console.warn( pp(s)); var s2_ = (s2 !== '') ? s2 : []; return s.concat(s2_, bl); } @@ -453,7 +454,13 @@ paragraph return s1.concat(s2, /* [new TagTk('p')],*/ c); } -br = space* &newline { return new SelfclosingTagTk( 'br', [], {tsr: [pos0, pos]} ) } +br = s:optionalSpaceToken &newline { + return s.concat( + [ + new SelfclosingTagTk( 'br', [], {tsr: [pos0, pos]} ) + ] + ); +} /* * Syntax stops: Avoid eating significant tokens for higher-level productions @@ -494,6 +501,7 @@ inline inlineline = c:(urltext / !inline_breaks (inline_element / [^\r\n]))+ { + //console.warn('inlineline out:' + pp(c) + input.substr(pos0, pos)); return flatten_stringlist( c ); } @@ -830,9 +838,9 @@ template_param // MW accepts |foo | = bar | as a single param.. ('|' (space / newline)* &'=')? val:( - s0:space* + s0:optionalSpaceToken "=" - s1:space* + s1:optionalSpaceToken value:template_param_value? { return { s0: s0, s1: s1, value: value }; } @@ -1133,6 +1141,7 @@ pre_indent_in_tags / & { return stops.dec('pre'); } pre_indent_line = space l:inlineline { + //console.warn( JSON.stringify( [s, l] ) ); return l; } @@ -1207,7 +1216,7 @@ nowiki nowiki_content = ts:( t:[^<]+ { return t.join('') } - / "]* ">" p2:nowiki_content "" { + / "]* ">" p2:nowiki_content "" { //console.warn('nested pre in nowiki'); return [""], p2, [""]).join(''); } @@ -1309,9 +1318,9 @@ generic_newline_attribute // A single-line attribute. generic_attribute - = s:space* + = s:optionalSpaceToken name:generic_attribute_name - value:(space* + value:(optionalSpaceToken v:generic_attribute_value { return v })? { //console.warn( 'generic attribute: ' + pp([name, value])); @@ -1666,9 +1675,11 @@ htmlentity = "&" c:[#0-9a-zA-Z]+ ";" { return unentity("&" + c.join('') + ";") } -space +spaces = s:[ \t]+ { return s.join(''); } +space = [ \t] + optionalSpaceToken = s:space* { if ( s.length ) { @@ -1695,16 +1706,16 @@ sol // Eat includeonly/noinclude at start of line, so that start-of-line // syntax after it still matches ni:( niStart:({return pos}) - space* + s:space* "<" c:"/"? t:("includeonly" / "noinclude") - ">" {return [c, t, [niStart, pos]]} )? + ">" {return [s.join(''), c, t, [niStart, pos]]} )? { var niToken = []; if ( ni !== '') { - if ( ni[0] === '/' ) { - niToken = [new EndTagTk( ni[1], [], { tsr: ni[2] } )]; + if ( ni[1] === '/' ) { + niToken = [new EndTagTk( ni[2], [], { tsr: ni[3] } )]; } else { - niToken = [new TagTk( ni[1], [], { tsr: ni[2] } )]; + niToken = [new TagTk( ni[2], [], { tsr: ni[3] } )]; } }