/* Produces output more or less compatible with FakeParser; plug it into FP's output and see */ { /* Fixme: use static functions to separate module! Unfortunately, this * does not work: * var tu = require('./mediawiki.tokenizer.utils.js'); * console.log(tu.flatten([])); * Using exports in the module gets a bit further, but accesses to * tu.flatten in productions still fail. Thus, I just moved the functions * here until a solution is found: */ /* Static utilities */ // Flatten a list of lists. var flatten = function ( e ) { var es = []; // flatten sub-arrays for(var i = 0, length = e.length; i < length; i++) { var ei = e[i]; if ($.isArray(ei)) es = es.concat(flatten(ei)); else es.push(ei); }; return es; }; // Remove escaped quotes from attributes etc var unquote = function (quotec, text) { return text.replace('\\' + quotec, quotec); }; // Debug print with global switch var dp = function ( msg ) { if ( false ) { console.log(msg); } }; var pp = function ( s ) { return JSON.stringify(s, null, 2); } /* * Annotate a token stream with list items with appropriate list tokens * * @static * @method * @param {[tokens]} Token stream with li tokens * @returns {[tokens]} Token stream, possibly with additional list tokens * */ var annotateList = function ( tokens ) { var out = [], // List of tokens bstack = [], // Bullet stack, previous element's listStyle bnext = [], // Next element's listStyle endtags = []; // Stack of end tags var commonPrefixLength = function (x, y) { var minLength = Math.min(x.length, y.length); for(var i = 0; i < minLength; i++) { if (x[i] != y[i]) break; } return i; }; var pushList = function ( listName, itemName ) { out.push({type: 'TAG', name: listName}); out.push({type: 'TAG', name: itemName}); endtags.push({type: 'ENDTAG', name: listName}); endtags.push({type: 'ENDTAG', name: itemName}); }; var popTags = function ( n ) { for(;n > 0; n--) { // push list item.. out.push(endtags.pop()); // and the list end tag out.push(endtags.pop()); } }; var isDlDd = function (a, b) { var ab = [a,b].sort(); return (ab[0] === ':' && ab[1] === ';'); }; var doListItem = function ( bs, bn ) { var prefixLen = commonPrefixLength (bs, bn); var changeLen = bn.length - prefixLen; var prefix = bn.slice(0, prefixLen); // emit close tag tokens for closed lists if (changeLen === 0) { var itemToken = endtags.pop(); out.push(itemToken); out.push({type: 'TAG', name: itemToken.name}); endtags.push({type: 'ENDTAG', name: itemToken.name}); } else if ( bs.length == bn.length && changeLen == 1 && isDlDd( bs[prefixLen], bn[prefixLen] ) ) { // handle dd/dt transitions out.push(endtags.pop()); if( bn[prefixLen] == ';') { var newName = 'dt'; } else { var newName = 'dd'; } out.push({type: 'TAG', name: newName}); endtags.push({type: 'ENDTAG', name: newName}); } else { popTags(bs.length - prefixLen); for(var i = prefixLen; i < bn.length; i++) { switch (bn[i]) { case '*': pushList('ul', 'li'); break; case '#': pushList('ol', 'li'); break; case ';': pushList('dl', 'dt'); break; case ':': pushList('dl', 'dd'); break; default: throw("Unknown node prefix " + prefix[i]); } } } }; for (var i = 0, length = tokens.length; i < length; i++) { var token = tokens[i]; switch ( token.type ) { case 'TAG': switch (token.name) { case 'list': // ignore token break; case 'listItem': // convert listItem to list and list item tokens bnext = token.bullets; doListItem( bstack, bnext ); bstack = bnext; break; default: // pass through all remaining start tags out.push(token); break; } break; case 'ENDTAG': if ( token.name == 'list' ) { // pop all open list item tokens popTags(bstack.length); bstack = ""; } else { out.push(token); } break; default: out.push(token); break; } } return out; }; /* * Italic/Bold handling. * * - list of tokens * - NEWLINE * - ticks (2+) -> list with link in line token list? * - process on newline * - need access to text nodes before/after for conversion back to text */ var doQuotes = function ( tokens ) { var italics = [], bolds = [], out = [], inserted = 0; var convertBold = function ( i ) { var index = bolds[i]; var txt = out[index - 1]; txt.value += "'"; bolds = bolds.slice(0, i) .concat(bolds.slice(i + 1, bolds.length - i - 1)); italics.push(index); italics.sort(); }; // convert italics/bolds into tags var quotesToTags = function ( offsets, name ) { var toggle = true; for (var j = 0; j < offsets.length; j++) { var t = out[offsets[j]]; if(toggle) { t.type = 'TAG'; } else { t.type = 'ENDTAG'; } t.name = name; delete t.value; toggle = !toggle; } if (!toggle) { // add end tag out.push({type: 'ENDTAG', name: name}); inserted++; } toggle = true; }; for (var i = 0, length = tokens.length; i < length; i++) { var token = tokens[i]; switch (token.type) { case 'QUOTE': // depending on length, add starting 's to preceding text node // (if any) // add token index to italic/bold lists // add placeholder for token var qlen = token.value.length; switch (qlen) { case 2: italics.push(i + inserted); out.push(token); break; case 3: bolds.push(i + inserted); out.push(token); break; case 4: token.value = "'''"; if (i > 0 && tokens[i-1].type === 'TEXT') { tokens[i-1].value += "'"; } else { out.push({type: 'TEXT', value: "'"}); inserted++; } bolds.push(i + inserted); out.push(token); break; case 5: // order does not matter here, will be fixed // by HTML parser backend italics.push(i + inserted); out.push({type: 'QUOTE', value: "''"}); inserted++; bolds.push(i + inserted); out.push({type: 'QUOTE', value: "'''"}); break; default: // longer than 5, only use the last 5 ticks token.value = "'''''"; var newvalue = token.value.substr(0, qlen - 5 ); if (i > 0 && tokens[i-1].type === 'TEXT') { tokens[i-1].value += newvalue; } else { out.push({type: 'TEXT', value: newvalue}); inserted++; } italics.push(i + inserted); out.push({type: 'QUOTE', value: "''"}); inserted++; bolds.push(i + inserted); out.push({type: 'QUOTE', value: "'''"}); break; } break; case 'NEWLINE': // balance out tokens, convert placeholders into tags if (italics.length % 2 && bolds.length % 2) { dp("balancing!"); var firstsingleletterword = -1, firstmultiletterword = -1, firstspace = -1; for (var j = 0; j < bolds.length; j++) { var ticki = bolds[j]; if (ticki > 0 && out[ticki - 1].type === 'TEXT') { var txt = out[ticki - 1], lastchar = txt.value[txt.value.length - 1], secondtolastchar = txt.value[txt.value.length - 2]; dp('txt: ' + pp(txt)); if (lastchar === ' ' && firstspace === -1) { firstspace = j; } else if (lastchar !== ' ') { if ( secondtolastchar === ' ' && firstsingleletterword === -1) { firstsingleletterword = j; } else if ( secondtolastchar && secondtolastchar !== ' ') { firstmultiletterword = j; } } } } // now see if we can convert a bold to an italic and // an apostrophe if (firstsingleletterword > -1) { convertBold(firstsingleletterword); } else if (firstmultiletterword > -1) { convertBold(firstmultiletterword); } else if (firstspace > -1) { convertBold(firstspace); } } quotesToTags(bolds, 'b'); quotesToTags(italics, 'i'); bolds = []; italics = []; out.push(token); break; default: out.push(token); } } return out; }; /* End static utilities */ /* * Flags for specific parse environments (inside tables, links etc). Flags * trigger syntactic stops in the inline_breaks production, which * terminates inline and attribute matches. Flags merely reduce the number * of productions needed: The grammar is still context-free as the * productions can just be unrolled for all combinations of environments * at the cost of a much larger grammar. */ var syntaxFlags = {}; var setFlag = function(flag) { if (syntaxFlags[flag] !== undefined) { syntaxFlags[flag]++; } else { syntaxFlags[flag] = 1; } return true; }; var clearFlag = function(flag) { syntaxFlags[flag]--; }; // Start position of top-level block // Could also provide positions for lower-level blocks using a stack. var blockStart = 0; // cache the input length var inputLength = input.length; // pseudo-production that matches at end of input var isEOF = function (pos) { return pos === inputLength; }; // text start position var textStart = 0; var linkCount = 1; } start = e:toplevelblock* newline* { return flatten(e); } /* All chars that cannot start syntactic structures in the middle of a line * XXX: ] and other end delimiters should probably only be activated inside * structures to avoid unnecessarily leaving the text production on plain * content. */ text_char = [^'<~[{\n\r:\]}|!=] text = t:text_char+ { return t.join(''); } /* Explanation of chars * ' quotes (italic/bold) * < start of xmlish_tag * ~ signatures/dates * [ start of links * { start of parser functions, transclusion and template args * \n all sort of block-level markup at start of line * \r ditto * h http(s) urls * n nntp(s) urls * m mailto urls * * ! and | table cell delimiters, might be better to specialize those * = headings - also specialize those! * * The following chars are also included for now, but only apply in some * contexts and should probably be enabled only in those: * : separate definition in ; term : definition * ] end of link * } end of parser func/transclusion/template arg */ urltext = ( t:[^'<~[{\n\rfghimnstw|!:\]} &=]+ { return t.join(''); } // XXX: use general entity decode! / "&" { return "&"; } // decode ampersand in text / urllink // Convert trailing space into // XXX: This should be moved to a serializer / ' ' & ':' { return "\u00a0"; } / t:text_char )+ /* '//', // for protocol-relative URLs, but not in text! 'ftp://', 'git://', 'gopher://', 'http://', 'https://', 'irc://', 'ircs://', // @bug 28503 'mailto:', 'mms://', 'news:', 'nntp://', // @bug 3808 RFC 1738 'svn://', 'telnet://', // Well if we're going to support the above.. -ævar 'worldwind://', */ // Old version //text = t:[A-Za-z0-9,._ "?!\t-]+ { return t.join('') } // Experimental tweaked version: avoid expensive single-char substrings // This did not bring the expected performance boost, however. //text = [A-Za-z0-9,._ -] { // textStart = pos; // // var res = input.substr(textStart - 1, inputLength) // .match(/[A-Za-z0-9,._ -]+/)[0]; // pos = pos + (res.length - 1); // return res // } space = s:[ \t]+ { return s.join(''); } // Start of line sol = (newline / & { return pos === 0; } { return true; }) cn:(c:comment n:newline? { return [c, {type: 'TEXT', value: n}] })* { return [{type: 'NEWLINE'}].concat(cn); } eof = & { return isEOF(pos); } { return true; } newline = '\n' / '\r\n' eolf = newline / eof toplevelblock = & { blockStart = pos; return true; } b:block { b = flatten(b); var bs = b[0]; //dp('toplevelblock:' + pp(b)); if (bs.attribs === undefined) { bs.attribs = []; } bs.attribs.push(['data-sourcePos', blockStart + ':' + pos]); // XXX: only run this for lines that actually need it! b.push({type: 'NEWLINE'}); b = doQuotes(b); return b; } block = block_lines / pre / comment &eolf / nowiki / pre / bt:block_tag { return [bt] } // avoid a paragraph if we know that the line starts with a block tag / para / inlineline // includes generic tags; wrapped into paragraphs in DOM postprocessor / s:sol { if (s) { return [s, {type: 'NEWLINE'}]; } else { return [{type: 'NEWLINE'}]; } } block_lines = s:sol // eat an empty line before the block s2:(ss:space* so:sol { return [{type: 'TEXT', value: ss.join('')}].concat(so) })? bl:block_line { var s2_ = (s2 !== '') ? s2 : []; return s.concat(s2_, bl); } // Block structures with start-of-line wiki syntax block_line = h / table / lists // tag-only lines should not trigger pre / space* bt:block_tag space* &eolf { return bt } / pre_indent / pre // TODO: convert inline content to annotations! para = s1:sol s2:sol c:inlineline { return s1.concat(s2, [{type: 'TAG', name: 'p'}], c); } br = space* &newline { return {type: 'SELFCLOSINGTAG', name: 'br'} } // Syntax stops to limit inline expansion defending on syntactic context inline_breaks = & { // Important hack: disable caching for this production, as the default // cache key does not take into account flag states! cacheKey = ''; return true; } & { return syntaxFlags['table']; } a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a) + pos); return true; } / & { return (syntaxFlags['colon'] && ! syntaxFlags.extlink && ! syntaxFlags.linkdesk); } ":" { return true; } / & { return syntaxFlags['extlink']; } "]" { return true; } / & { return syntaxFlags['linkdesc']; } link_end { return true; } / & { return syntaxFlags['h']; } ( & { return syntaxFlags['h1'] } '=' space* newline { return true; } / & { return syntaxFlags['h2'] } '==' space* newline { return true; } / & { return syntaxFlags['h3'] } '===' space* newline { return true; } / & { return syntaxFlags['h4'] } '====' space* newline { return true; } / & { return syntaxFlags['h5'] } '=====' space* newline { return true; } / & { return syntaxFlags['h6'] } '======' space* newline { return true; } ) inline = c:(urltext / inline_element / (!inline_breaks ch:. { return ch; }))+ { var out = []; var text = []; c = flatten(c); for (var i = 0; i < c.length; i++) { if (typeof c[i] == 'string') { text.push(c[i]); } else { if (text.length) { out.push({ type: "TEXT", value: text.join('') }); text = []; } out.concat(c[i]); } } if (text.length) { out.push({ type: 'TEXT', value: text.join('') }); } return out; } inlineline = c:(urltext / !inline_breaks (inline_element / [^\n]))+ { var out = []; var text = []; c = flatten(c); for (var i = 0; i < c.length; i++) { if (typeof c[i] == 'string') { text.push(c[i]); } else { if (text.length) { out.push({type: 'TEXT', value: text.join('')}); text = []; } out.push(c[i]); } } if (text.length) { out.push({type: 'TEXT', value: text.join('')}); } dp('inlineline out:', pp(out)); return out; } /* TODO: convert all these to annotations! * -> need (start, end) offsets within block */ inline_element = & { dp('inline_element enter' + input.substr(pos, 10)); return true; } comment // Can actually also be block-level elements, we don't really try to enforce // a content model in the tokenizer. The HTML tree builder and DOM // transformations are better equipped to deal with it. / xmlish_tag / template / wikilink / extlink / quote /* Headings * * Listed in reverse order on purpose ;) */ h = h6 / h5 / h4 / h3 / h2 / h1 /* We might want to consider using a single rule for all headings, and * figuring out the level in the action. This saves quite some backtracking, * and the conversion of equal signs into text should not be a problem as * equals are not part of other syntax. */ h1 = '=' r:( & { setFlag('h'); return setFlag('h1') } c:inlineline '=' (space / comment)* &eolf { clearFlag('h'); clearFlag('h1'); return [{type: 'TAG', name: 'h1'}] .concat(c, [{type: 'ENDTAG', name: 'h1'}]); } / & { dp('nomatch exit h1'); clearFlag('h'); clearFlag('h1'); return false } { return null } ) { return r } h2 = '==' r:( & { setFlag('h'); return setFlag('h2') } c:inlineline '==' (space / comment)* &newline { clearFlag('h'); clearFlag('h2'); return [{type: 'TAG', name: 'h2'}] .concat(c, [{type: 'ENDTAG', name: 'h2'}]); } / & { clearFlag('h'); clearFlag('h2'); return false } ) { return r } h3 = '===' r:( & { setFlag('h'); return setFlag('h3') } c:inlineline '===' comment? &newline { clearFlag('h'); clearFlag('h3'); return [{type: 'TAG', name: 'h3'}] .concat(c, [{type: 'ENDTAG', name: 'h3'}]); } / & { clearFlag('h'); clearFlag('h3'); return false } ) { return r } h4 = '====' r:( & { setFlag('h'); return setFlag('h4') } c:inlineline '====' comment? &newline { clearFlag('h'); clearFlag('h4'); return [{type: 'TAG', name: 'h4'}] .concat(c, [{type: 'ENDTAG', name: 'h4'}]); } / & { clearFlag('h'); clearFlag('h4'); return false } ) { return r } h5 = '=====' r:(& { setFlag('h'); return setFlag('h5') } c:inlineline '=====' comment? &newline { clearFlag('h'); clearFlag('h5'); return [{type: 'TAG', name: 'h5'}] .concat(c, [{type: 'ENDTAG', name: 'h5'}]); } / & { clearFlag('h'); clearFlag('h5'); return false } ) { return r } h6 = '======' r:(& { setFlag('h'); return setFlag('h6') } c:inlineline '======' comment? &newline { clearFlag('h'); clearFlag('h6'); return [{type: 'TAG', name: 'h6'}] .concat(c, [{type: 'ENDTAG', name: 'h6'}]); } / & { clearFlag('h'); clearFlag('h6'); return false } ) { return r } pre_indent = l:pre_indent_line ls:(sol pre_indent_line)* { return [{type: 'TAG', name: 'pre'}] .concat( [l], ls , [{type: 'ENDTAG', name: 'pre'}]); } pre_indent_line = space l:inlineline { return [{type: 'TEXT', value: '\n'}].concat(l); } comment = '' / eof) cs:(space* newline space* cn:comment { return cn })* { return [{ type: 'COMMENT', value: c.join('') }].concat(cs); } comment_chars = c:[^-] { return c; } / c:'-' !'->' { return c; } urllink = target:url { return [ { type: 'TAG', name: 'a', attribs: [['href', target]] } , {type: 'TEXT', value: target} , {type: 'ENDTAG', name: 'a'} ]; } extlink = "[" & { return setFlag('extlink'); } target:url space* text:inlineline? "]" { clearFlag('extlink'); if ( text == '' ) { // XXX: Link numbering should be implemented in post-processor. text = [{type: 'TEXT', value: "[" + linkCount + "]"}]; linkCount++; } return [ { type: 'TAG', name: 'a', attribs: [['href', target]] } ] .concat( text , [{type: 'ENDTAG', name: 'a'}]); } / "[" & { clearFlag('extlink'); return false; } /* Defaul URL protocols in MediaWiki (see DefaultSettings). Normally these can * be configured dynamically. */ url_protocol = '//' // for protocol-relative URLs / 'ftp://' / 'git://' / 'gopher://' / 'http://' / 'https://' / 'irc://' / 'ircs://' // @bug 28503 / 'mailto:' / 'mms://' / 'news:' / 'nntp://' // @bug 3808 RFC 1738 / 'svn://' / 'telnet://' // Well if we're going to support the above.. -ævar / 'worldwind://' // javascript does not support unicode features.. unicode_separator_space = [ \u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000] url = proto:url_protocol rest:( [^ :\]\[\n"'<>\x00-\x20\x7f,.&\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000] / s:[.:,] !(space / eolf) { return s } // XXX: use general entity decode! / '&' { return '&' } / '&' )+ { return proto + rest.join(''); } //[^][<>"\\x00-\\x20\\x7F\p{Zs}] template = "{{" target:template_target params:("|" p:template_param { return p })* "}}" { var obj = { type: 'TAG', name: 'template', attribs: [['target', target]] } if (params && params.length) { obj.attribs.push(params); } // Should actually use a self-closing tag here, but the Node HTML5 // parser only recognizes known self-closing tags for now, so use an // explicit end tag for now. return [obj, {type: 'ENDTAG', name: 'template'}]; } template_target = h:( !"}}" x:([^|\n]) { return x } )* { return h.join(''); } template_param = name:template_param_name "=" c:template_param_text { return [name, c]; } / c:template_param_text { return [null, c]; } tplarg = "{{{" name:link_target params:("|" p:template_param { return p })* "}}}" { var obj = { type: 'SELFCLOSINGTAG', name: 'templatearg', attribs: [['argname', name]] }; if (params && params.length) { obj.attribs.push(params); } return obj; } template_param_name = h:( !"}}" x:([^=|\n]) { return x } )* { return h.join(''); } template_param_text = template_param_text_chunk* /* = h:( !"}}" x:([^|]) { return x } )* { return h.join(''); }*/ template_param_text_chunk = comment / xmlish_tag / extlink / template / wikilink / quote / !"}}" x:([^|\n]) { return x } wikilink = "[[" target:link_target text:("|" lt:link_text { return lt })* "]]" suffix:text? { var obj = { type: 'TAG', name: 'a', attribs: [['data-type', 'internal']] }; obj.attribs.push(['href', target]); if (text && text.length) { var textTokens = text; // XXX } else { if (suffix !== '') { target += suffix; } var textTokens = [{type: 'TEXT', value: target}]; } return [obj].concat(textTokens, [{type: 'ENDTAG', name: 'a'}]); } link_target = h:( !"]]" x:([^|\n]) { return x } )* { return h.join(''); } link_text = h:( & { return setFlag('linkdesc'); } x:inlineline { return x } )* { clearFlag('linkdesc') return h; } / & { clearFlag('linkdesc') } { return null; } link_end = "]]" /* Generic quote production for italic and bold, further processed in a token * stream transformation in doQuotes. Relies on NEWLINE tokens being emitted * for each line of text to balance quotes per line. * * We are not using a simple pair rule here as we need to support mis-nested * bolds/italics and MediaWiki's special heuristics for apostrophes, which are * all not context free. */ quote = "''" x:"'"* { return { type : 'QUOTE', value: "''" + x.join('') } } /* Will need to check anything xmlish agains known/allowed HTML tags and * registered extensions, otherwise fail the match. Should ref be treated as a * regular extension? */ xmlish_tag = nowiki / ref / references / generic_tag pre = "
" ts:(t1:[^<]+ { return {type:'TEXT',value:t1.join('')} } / nowiki / !"" t2:. {return {type:'TEXT',value:t2}})+ ("" / eof) { // return nowiki tags as well? //console.log('inpre'); return [ {type: 'TAG', name: 'pre', attribs: attribs} ] .concat(ts, [{type: 'ENDTAG', name: 'pre'}]); } / "" { return {type: 'TEXT', value: ""}; } nowiki = "
]* ">" p2:nowiki_content "" { //console.log('nested pre in nowiki'); return ["
"], [p2[0].value], [""]).join(''); } / (!("" / "") c:. {return c}) )* { // return nowiki tags as well? return [{type: 'TEXT', value: ts.join('')}]; } // See http://dev.w3.org/html5/spec/Overview.html#syntax-tag-name and // following paragraphs block_tag = "<" end:"/"? name:block_name attribs:generic_attribute* selfclose:"/"? ">" { var res = {name: name, attribs: attribs}; if ( end != '' ) { res.type = 'ENDTAG'; } else if ( selfclose != '' ) { res.type = 'SELFCLOSINGTAG'; } else { res.type = 'TAG'; } return res; } block_name = "p" / "table" / "td" / "tr" / "ul" / "ol" / "li" / "dl" / "dt" / "dd" / "div" / "center" / "blockquote" // See http://dev.w3.org/html5/spec/Overview.html#syntax-tag-name and // following paragraphs generic_tag = "<" end:"/"? name:[0-9a-zA-Z]+ attribs:generic_attribute* space* selfclose:"/"? ">" { var res = {name: name.join(''), attribs: attribs}; if ( end != '' ) { res.type = 'ENDTAG'; } else if ( selfclose != '' ) { res.type = 'SELFCLOSINGTAG'; } else { res.type = 'TAG'; } return res; } generic_attribute = s:space* name:generic_attribute_name value:(space* generic_attribute_value)? { if ( value !== '' ) { return [name, value[1]]; } else { return [name,'']; } } // http://dev.w3.org/html5/spec/Overview.html#attributes-0 generic_attribute_name = n:[^ \t\0/"'>=]+ { return n.join(''); } generic_attribute_value = "=" space* v:att_value {return v} att_value = t:[^ \t'"<>='\n]+ { return [null, t.join('')]; } / "'" t:[^'>]+ "'" { return unquote("'", t.join('')); } / '"' t:[^">]+ '"' { return unquote('"', t.join('')); } ref = ref_full / ref_empty /* Can we do backreferences to genericize this? */ ref_full = start:ref_start ">" content:ref_content* close:ref_end { return [ { type: 'TAG', name: 'ext', attribs: [['data-extname', 'ref']] .concat(start.params, [['data-startws', start.ws]])}, content, {type: 'ENDTAG', name: 'ref'} ]; } ref_empty = start:ref_start close:(space* "/>") { return [{ type: 'SELFCLOSINGTAG', name: 'ext', attribs: [['data-extname', 'ref']] .concat(start.params ,[['data-startws', start.ws]]) }]; } ref_start = "") { return all.join(''); } ref_content = !ref_end a:inline { // XXX: ineffective syntactic stop return a; } /* fixme probably have to programatically add these */ references = references_full / references_empty references_full = start:references_start ">" content:references_content* close:references_end { return [ { type: 'TAG', name: 'ext', attribs: [['data-extname', 'references']] .concat(start.params ,[['data-startws', start.ws]]) }, content, { type: 'ENDTAG', name: 'ext' } ]; } references_empty = start:references_start close:(space* "/>") { return [{ type: 'SELFCLOSINGTAG', name: 'ext', attribs: [['data-extname', 'references']] .concat(start.params ,[['data-startws', start.ws]]) }]; } references_start = "