mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-29 08:34:54 +00:00
863 lines
22 KiB
Plaintext
863 lines
22 KiB
Plaintext
/* Produces output more or less compatible with FakeParser; plug it into FP's output and see */
|
|
{
|
|
/* Fixme: use static functions to separate module! Unfortunately, this
|
|
* does not work:
|
|
* var tu = require('./mediawiki.tokenizer.utils.js');
|
|
* console.log(tu.flatten([]));
|
|
* Using exports in the module gets a bit further, but accesses to
|
|
* tu.flatten in productions still fail. Thus, I just moved the functions
|
|
* here until a solution is found:
|
|
*/
|
|
|
|
/* Static utilities */
|
|
|
|
// Flatten a list of lists.
|
|
var flatten = function ( e ) {
|
|
var es = [];
|
|
// flatten sub-arrays
|
|
for(var i = 0, length = e.length; i < length; i++) {
|
|
var ei = e[i];
|
|
if ($.isArray(ei))
|
|
es = es.concat(flatten(ei));
|
|
else
|
|
es.push(ei);
|
|
};
|
|
return es;
|
|
};
|
|
|
|
// Remove escaped quotes from attributes etc
|
|
var unquote = function (quotec, text) {
|
|
return text.replace('\\' + quotec, quotec);
|
|
};
|
|
|
|
|
|
// Debug print with global switch
|
|
var dp = function ( msg ) {
|
|
if ( false ) {
|
|
console.log(msg);
|
|
}
|
|
};
|
|
|
|
var pp = function ( s ) { return JSON.stringify(s, null, 2); }
|
|
|
|
/*
|
|
* Annotate a token stream with list items with appropriate list tokens
|
|
*
|
|
* @static
|
|
* @method
|
|
* @param {[tokens]} Token stream with li tokens
|
|
* @returns {[tokens]} Token stream, possibly with additional list tokens
|
|
* */
|
|
var annotateList = function ( tokens ) {
|
|
var out = [], // List of tokens
|
|
bstack = [], // Bullet stack, previous element's listStyle
|
|
bnext = [], // Next element's listStyle
|
|
endtags = []; // Stack of end tags
|
|
|
|
var commonPrefixLength = function (x, y) {
|
|
var minLength = Math.min(x.length, y.length);
|
|
for(var i = 0; i < minLength; i++) {
|
|
if (x[i] != y[i])
|
|
break;
|
|
}
|
|
return i;
|
|
};
|
|
|
|
var pushList = function ( listName, itemName ) {
|
|
out.push({type: 'TAG', name: listName});
|
|
out.push({type: 'TAG', name: itemName});
|
|
endtags.push({type: 'ENDTAG', name: listName});
|
|
endtags.push({type: 'ENDTAG', name: itemName});
|
|
};
|
|
|
|
var popTags = function ( n ) {
|
|
for(;n > 0; n--) {
|
|
// push list item..
|
|
out.push(endtags.pop());
|
|
// and the list end tag
|
|
out.push(endtags.pop());
|
|
}
|
|
};
|
|
|
|
var isDlDd = function (a, b) {
|
|
var ab = [a,b].sort();
|
|
return (ab[0] === ':' && ab[1] === ';');
|
|
};
|
|
|
|
var doListItem = function ( bs, bn ) {
|
|
var prefixLen = commonPrefixLength (bs, bn);
|
|
var changeLen = bn.length - prefixLen;
|
|
var prefix = bn.slice(0, prefixLen);
|
|
// emit close tag tokens for closed lists
|
|
if (changeLen === 0) {
|
|
var itemToken = endtags.pop();
|
|
out.push(itemToken);
|
|
out.push({type: 'TAG', name: itemToken.name});
|
|
endtags.push({type: 'ENDTAG', name: itemToken.name});
|
|
} else if ( bs.length == bn.length
|
|
&& changeLen == 1
|
|
&& isDlDd( bs[prefixLen], bn[prefixLen] ) ) {
|
|
// handle dd/dt transitions
|
|
out.push(endtags.pop());
|
|
if( bn[prefixLen] == ';') {
|
|
var newName = 'dt';
|
|
} else {
|
|
var newName = 'dd';
|
|
}
|
|
out.push({type: 'TAG', name: newName});
|
|
endtags.push({type: 'ENDTAG', name: newName});
|
|
} else {
|
|
for(var i = prefixLen; i < bn.length; i++) {
|
|
switch (bn[i]) {
|
|
case '*':
|
|
pushList('ul', 'li');
|
|
break;
|
|
case '#':
|
|
pushList('ol', 'li');
|
|
break;
|
|
case ';':
|
|
pushList('dl', 'dt');
|
|
break;
|
|
case ':':
|
|
pushList('dl', 'dd');
|
|
break;
|
|
default:
|
|
throw("Unknown node prefix " + prefix[i]);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
for (var i = 0, length = tokens.length; i < length; i++) {
|
|
var token = tokens[i];
|
|
switch ( token.type ) {
|
|
case 'TAG':
|
|
switch (token.name) {
|
|
case 'list':
|
|
// ignore token
|
|
break;
|
|
case 'listItem':
|
|
// convert listItem to list and list item tokens
|
|
bnext = token.bullets;
|
|
doListItem( bstack, bnext );
|
|
bstack = bnext;
|
|
break;
|
|
default:
|
|
// pass through all remaining start tags
|
|
out.push(token);
|
|
break;
|
|
}
|
|
break;
|
|
case 'ENDTAG':
|
|
if ( token.name == 'list' ) {
|
|
// pop all open list item tokens
|
|
popTags(bstack.length);
|
|
bstack = "";
|
|
} else {
|
|
out.push(token);
|
|
}
|
|
break;
|
|
default:
|
|
out.push(token);
|
|
break;
|
|
}
|
|
}
|
|
return out;
|
|
};
|
|
|
|
|
|
/* End static utilities */
|
|
|
|
/*
|
|
* Flags for specific parse environments (inside tables, links etc). Flags
|
|
* trigger syntactic stops in the inline_breaks production, which
|
|
* terminates inline and attribute matches. Flags merely reduce the number
|
|
* of productions needed: The grammar is still context-free as the
|
|
* productions can just be unrolled for all combinations of environments
|
|
* at the cost of a much larger grammar.
|
|
*/
|
|
var syntaxFlags = {};
|
|
var setFlag = function(flag) {
|
|
if (syntaxFlags[flag] !== undefined) {
|
|
syntaxFlags[flag]++;
|
|
} else {
|
|
syntaxFlags[flag] = 1;
|
|
}
|
|
return true;
|
|
};
|
|
var clearFlag = function(flag) {
|
|
syntaxFlags[flag]--;
|
|
};
|
|
|
|
// Start position of top-level block
|
|
// Could also provide positions for lower-level blocks using a stack.
|
|
var blockStart = 0;
|
|
}
|
|
|
|
start
|
|
= e:toplevelblock* newline* {
|
|
return flatten(e);
|
|
}
|
|
|
|
anyblock = block / inline
|
|
anyblockline = block / inlineline
|
|
|
|
|
|
// All chars that cannot start syntactic structures
|
|
text = t:[A-Za-z0-9,._ -]+ { return t.join('') }
|
|
|
|
space
|
|
= s:[ \t]+ { return s.join(''); }
|
|
|
|
|
|
// Start of line
|
|
sol = (newline / & { return pos === 0; } { return true; })
|
|
cn:(c:comment n:newline? { return [c, n] })? {
|
|
return cn;
|
|
}
|
|
|
|
|
|
newline
|
|
= '\n' / '\r\n'
|
|
|
|
toplevelblock
|
|
= & { blockStart = pos; return true; } b:block {
|
|
b = flatten(b);
|
|
var bs = b[0];
|
|
dp('toplevelblock:' + pp(b));
|
|
if (bs.attribs === undefined) {
|
|
bs.attribs = [];
|
|
}
|
|
bs.attribs.push(['data-sourcePos', blockStart + ':' + pos]);
|
|
return b;
|
|
}
|
|
|
|
block
|
|
= (sol space* &newline)? bl:block_lines { return bl; }
|
|
/ para
|
|
/ comment
|
|
/ (s:sol {
|
|
if (s) {
|
|
return [s, {type: 'NEWLINE'}];
|
|
} else {
|
|
return [{type: 'NEWLINE'}];
|
|
}
|
|
}
|
|
)
|
|
|
|
|
|
// Block structures with start-of-line wiki syntax
|
|
block_lines
|
|
= h
|
|
/ table
|
|
/ lists
|
|
/ pre_indent
|
|
|
|
|
|
/* Headings */
|
|
h = h1 / h2 / h3 / h4 / h5 / h6
|
|
|
|
h1 = sol '='
|
|
(
|
|
& { setFlag('h'); return setFlag('h1') }
|
|
c:inlineline '=' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h1');
|
|
return [{type: 'TAG', name: 'h1'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h1'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h1'); return null }
|
|
)
|
|
|
|
h2 = sol '=='
|
|
(
|
|
& { setFlag('h'); return setFlag('h2') }
|
|
c:inlineline '==' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h2');
|
|
return [{type: 'TAG', name: 'h2'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h2'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h2'); return null }
|
|
)
|
|
|
|
h3 = sol '==='
|
|
(
|
|
& { setFlag('h'); return setFlag('h3') }
|
|
c:inlineline '===' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h3');
|
|
return [{type: 'TAG', name: 'h3'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h3'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h3'); return null }
|
|
)
|
|
|
|
h4 = sol '===='
|
|
(
|
|
& { setFlag('h'); return setFlag('h4') }
|
|
c:inlineline '====' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h4');
|
|
return [{type: 'TAG', name: 'h4'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h4'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h4'); return null }
|
|
)
|
|
|
|
h5 = sol '====='
|
|
(& { setFlag('h'); return setFlag('h5') }
|
|
c:inlineline '=====' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h5');
|
|
return [{type: 'TAG', name: 'h5'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h5'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h5'); return null }
|
|
)
|
|
|
|
h6 = sol '======'
|
|
(& { setFlag('h'); return setFlag('h6') }
|
|
c:inlineline '======' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h6');
|
|
return [{type: 'TAG', name: 'h6'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h6'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h6'); return null }
|
|
)
|
|
|
|
heading_marker
|
|
= '=' '='*
|
|
|
|
heading_text
|
|
= h:( !(heading_marker newline) x:inlineline { return x } )* { return h.join(''); }
|
|
|
|
|
|
// TODO: convert inline content to annotations!
|
|
para
|
|
= (sol br)? pl:para_lines { return pl; }
|
|
|
|
para_lines
|
|
= s:sol c:inlineline cs:(!block_lines para_lines)* {
|
|
var res = [{type: 'TAG', name: 'p'}];
|
|
if (s !== '') {
|
|
res.push(s)
|
|
}
|
|
//console.log('paralines' + pp(res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}])));
|
|
return res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}]);
|
|
}
|
|
|
|
br = space* &newline { return {type: 'SELFCLOSINGTAG', name: 'br'} }
|
|
|
|
pre_indent
|
|
= l:pre_indent_line+ {
|
|
return [{type: 'TAG', name: 'pre'}]
|
|
.concat( l
|
|
, [{type: 'ENDTAG', name: 'pre'}]);
|
|
}
|
|
pre_indent_line = sol space l:inlineline { return l }
|
|
|
|
// Syntax that stops inline expansion
|
|
inline_breaks
|
|
= //& { console.log(pp(syntaxFlags)); return true; }
|
|
& { return syntaxFlags['table']; }
|
|
a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a)); return true; }
|
|
/ & { return syntaxFlags['italic']; } italic_marker { return true; }
|
|
/ & { return syntaxFlags['bold']; } bold_marker { return true; }
|
|
/ & { return syntaxFlags['linkdesc']; } link_end { return true; }
|
|
/ & { return syntaxFlags['h']; }
|
|
( & { return syntaxFlags['h1'] } '=' newline { return true; }
|
|
/ & { return syntaxFlags['h2'] } '==' newline { return true; }
|
|
/ & { return syntaxFlags['h3'] } '===' newline { return true; }
|
|
/ & { return syntaxFlags['h4'] } '====' newline { return true; }
|
|
/ & { return syntaxFlags['h5'] } '=====' newline { return true; }
|
|
/ & { return syntaxFlags['h6'] } '======' newline { return true; }
|
|
)
|
|
|
|
|
|
|
|
inline
|
|
= c:(text / inline_element / (!inline_breaks ch:. { return ch; }))+ {
|
|
var out = [];
|
|
var text = [];
|
|
c = flatten(c);
|
|
for (var i = 0; i < c.length; i++) {
|
|
if (typeof c[i] == 'string') {
|
|
text.push(c[i]);
|
|
} else {
|
|
if (text.length) {
|
|
out.push({ type: "TEXT", value: text.join('') });
|
|
text = [];
|
|
}
|
|
out.concat(c[i]);
|
|
}
|
|
}
|
|
if (text.length) {
|
|
out.push({ type: 'TEXT', value: text.join('') });
|
|
}
|
|
return out;
|
|
}
|
|
|
|
inlineline
|
|
= c:(text / !inline_breaks (inline_element / [^\n]))+ {
|
|
var out = [];
|
|
var text = [];
|
|
c = flatten(c);
|
|
for (var i = 0; i < c.length; i++) {
|
|
if (typeof c[i] == 'string') {
|
|
text.push(c[i]);
|
|
} else {
|
|
if (text.length) {
|
|
out.push({type: 'TEXT', value: text.join('')});
|
|
text = [];
|
|
}
|
|
out.push(c[i]);
|
|
}
|
|
}
|
|
if (text.length) {
|
|
out.push({type: 'TEXT', value: text.join('')});
|
|
}
|
|
//dp('inlineline out:', pp(out));
|
|
return out;
|
|
}
|
|
|
|
/* TODO: convert all these to annotations!
|
|
* -> need (start, end) offsets within block
|
|
*/
|
|
inline_element
|
|
= comment
|
|
/ xmlish_tag
|
|
/ extlink
|
|
/ template
|
|
/ link
|
|
/ bold
|
|
/ italic
|
|
|
|
comment
|
|
= '<!--' c:comment_chars* '-->'
|
|
(space* newline space* comment)* {
|
|
return [{ type: 'COMMENT', value: c.join('') }];
|
|
}
|
|
|
|
comment_chars
|
|
= c:[^-] { return c; }
|
|
/ c:'-' !'->' { return c; }
|
|
|
|
extlink
|
|
= "[" target:url " " text:extlink_text "]" {
|
|
return [ { type: 'TAG',
|
|
name: 'a',
|
|
attribs: [['href', target]] }
|
|
, {type: 'TEXT', value: text}
|
|
, {type: 'ENDTAG', name: 'a'}];
|
|
}
|
|
|
|
// = "[" target:url text:extlink_text "]" { return { type: 'extlink', target: target, text: text } }
|
|
|
|
url
|
|
= proto:"http:" rest:([^ \]]+) { return proto + rest.join(''); }
|
|
|
|
extlink_text
|
|
= c:[^\]]+ { return c.join(''); }
|
|
|
|
template
|
|
= "{{" target:link_target params:("|" p:template_param { return p })* "}}" {
|
|
var obj = { type: 'SELFCLOSINGTAG', name: 'template', attribs: [['target', target]] }
|
|
if (params && params.length) {
|
|
obj.attribs.push(params);
|
|
}
|
|
return obj;
|
|
}
|
|
|
|
template_param
|
|
= name:template_param_name "=" c:template_param_text {
|
|
return [name, c];
|
|
} / c:template_param_text {
|
|
return [null, c];
|
|
}
|
|
|
|
tplarg
|
|
= "{{{" name:link_target params:("|" p:template_param { return p })* "}}}" {
|
|
var obj = {
|
|
type: 'SELFCLOSINGTAG',
|
|
name: 'templatearg',
|
|
attribs: [['argname', name]]
|
|
};
|
|
if (params && params.length) {
|
|
obj.attribs.push(params);
|
|
}
|
|
return obj;
|
|
}
|
|
|
|
template_param_name
|
|
= h:( !"}}" x:([^=|]) { return x } )* { return h.join(''); }
|
|
|
|
template_param_text
|
|
= template_param_text_chunk*
|
|
/* = h:( !"}}" x:([^|]) { return x } )* { return h.join(''); }*/
|
|
|
|
template_param_text_chunk
|
|
= comment
|
|
/ xmlish_tag
|
|
/ extlink
|
|
/ template
|
|
/ link
|
|
/ bold
|
|
/ italic
|
|
/ !"}}" x:([^|]) { return x }
|
|
|
|
link
|
|
= "[[" target:link_target text:("|" link_text)* "]]" {
|
|
var obj = {
|
|
type: 'TAG',
|
|
name: 'a',
|
|
attribs: [['data-type', 'internal']]
|
|
};
|
|
obj.attribs.push(['href', target]);
|
|
if (text && text.length) {
|
|
var textTokens = text[0][1]; // XXX
|
|
} else {
|
|
var textTokens = [{type: 'TEXT', value: target}];
|
|
}
|
|
return [obj].concat(textTokens, [{type: 'ENDTAG', name: 'a'}]);
|
|
}
|
|
|
|
link_target
|
|
= h:( !"]]" x:([^|]) { return x } )* { return h.join(''); }
|
|
|
|
link_text
|
|
= h:( & { return setFlag('linkdesc'); }
|
|
x:inlineline { return x }
|
|
)* {
|
|
clearFlag('linkdesc')
|
|
return h;
|
|
}
|
|
/ & { clearFlag('linkdesc') } { return null; }
|
|
|
|
link_end = "]]"
|
|
|
|
/* This implementation of bold and italic is very basic so far, and misses the
|
|
* finer points of doQuotes in the parser. A rough plan to get closer:
|
|
* - '''' -> ' '''
|
|
* - last ''''' in a row of ' is used
|
|
* - if *both* italics and bolds are unbalanced, check for prefix
|
|
* - convert single-letter or multi-letter non-space prefixed tick back to
|
|
* text
|
|
*/
|
|
bold
|
|
= bold_marker
|
|
& { dp('benter:' + pos); return setFlag('bold'); }
|
|
c:inlineline
|
|
(bold_marker / &newline) {
|
|
clearFlag('bold');
|
|
return [{ type: 'TAG', name: 'b' }]
|
|
.concat(c, [{type: 'ENDTAG', name: 'b'}]);
|
|
}
|
|
/ bold_marker { clearFlag('bold'); return null }
|
|
|
|
bold_marker = "'''"
|
|
|
|
|
|
italic
|
|
= italic_marker
|
|
& { dp('ienter:' + pos); return setFlag('italic'); }
|
|
c:inlineline
|
|
(italic_marker / &newline) {
|
|
clearFlag('italic');
|
|
dp('ileave:' + pos);
|
|
return [{ type: 'TAG', name: 'i' }]
|
|
.concat(c, [{ type: 'ENDTAG', name: 'i'}]);
|
|
}
|
|
/ italic_marker { clearFlag('italic'); return null }
|
|
|
|
italic_marker = "''"
|
|
|
|
/* Will need to check anything xmlish agains known/allowed HTML tags and
|
|
* registered extensions, otherwise fail the match. Should ref be treated as a
|
|
* regular extension? */
|
|
xmlish_tag = ref / references
|
|
|
|
ref = ref_full / ref_empty
|
|
|
|
/* Can we do backreferences to genericize this? */
|
|
ref_full
|
|
= start:ref_start ">" content:ref_content* close:ref_end {
|
|
return [
|
|
{ type: 'TAG',
|
|
name: 'ext',
|
|
attribs: [['data-extname', 'ref']]
|
|
.concat(start.params, [['data-startws', start.ws]])},
|
|
content,
|
|
{type: 'ENDTAG', name: 'ref'}
|
|
];
|
|
}
|
|
|
|
ref_empty
|
|
= start:ref_start close:(space* "/>") {
|
|
return [{ type: 'SELFCLOSINGTAG',
|
|
name: 'ext',
|
|
attribs: [['data-extname', 'ref']]
|
|
.concat(start.params
|
|
,[['data-startws', start.ws]])
|
|
}];
|
|
}
|
|
|
|
ref_start
|
|
= "<ref" params:ext_param* ws:space* {
|
|
return {
|
|
params: params,
|
|
ws: ws
|
|
};
|
|
}
|
|
|
|
ref_end
|
|
= all:("</ref" space* ">") {
|
|
return all.join('');
|
|
}
|
|
|
|
ref_content
|
|
= !ref_end a:inline { // XXX: ineffective syntactic stop
|
|
return a;
|
|
}
|
|
|
|
/* fixme probably have to programatically add these */
|
|
references = references_full / references_empty
|
|
|
|
references_full
|
|
= start:references_start ">" content:references_content* close:references_end {
|
|
return [
|
|
{ type: 'TAG',
|
|
name: 'ext',
|
|
attribs: [['data-extname', 'references']]
|
|
.concat(start.params
|
|
,[['data-startws', start.ws]])
|
|
},
|
|
content,
|
|
{ type: 'ENDTAG', name: 'ext' }
|
|
];
|
|
}
|
|
|
|
references_empty
|
|
= start:references_start close:(space* "/>") {
|
|
return
|
|
[{ type: 'SELFCLOSINGTAG',
|
|
name: 'ext',
|
|
attribs: [['data-extname', 'references']]
|
|
.concat(start.params
|
|
,[['data-startws', start.ws]])
|
|
}];
|
|
}
|
|
|
|
references_start
|
|
= "<references" params:ext_param* ws:space* {
|
|
return {
|
|
params: params,
|
|
ws: ws
|
|
};
|
|
}
|
|
|
|
references_end
|
|
= all:("</references" space* ">") {
|
|
return all.join('');
|
|
}
|
|
|
|
references_content
|
|
= !references_end a:inline {
|
|
return a;
|
|
}
|
|
|
|
|
|
ext_param
|
|
= space* name:ext_param_name "=" val:ext_param_val {
|
|
val[0] = name;
|
|
return val;
|
|
}
|
|
|
|
ext_param_name
|
|
= name:[a-zA-Z0-9-]+ {
|
|
return name.join('');
|
|
}
|
|
|
|
ext_param_val
|
|
= t:[0-9A-Za-z]+ { return [null, t.join('')]; }
|
|
/ "'" t:[^'>]+ "'" { return [null, unquote("'", t.join(''))]; }
|
|
/ '"' t:[^">]+ '"' { return [null, unquote('"', t.join(''))]; }
|
|
|
|
lists = es:(dtdd / li)+
|
|
{
|
|
return annotateList( [ { type: 'TAG', name: 'list'} ]
|
|
.concat(flatten(es)
|
|
,[{ type: 'ENDTAG', name: 'list' }]));
|
|
}
|
|
|
|
li = sol
|
|
bullets:list_char+
|
|
c:inlineline
|
|
&newline
|
|
{
|
|
return [ { type: 'TAG',
|
|
name: 'listItem',
|
|
bullets: bullets }
|
|
, c ];
|
|
}
|
|
|
|
dtdd = sol
|
|
bullets:list_char+
|
|
c:(inline_element / (n:[^:\n] { return {type: 'TEXT', value: n}; }))+
|
|
":"
|
|
d:(inline_element / (n:[^\n] { return {type: 'TEXT', value: n}; }))+
|
|
&newline
|
|
{
|
|
// reject rule if bullets do not end in semicolon
|
|
if (bullets[bullets.length - 1] != ';') {
|
|
return null;
|
|
} else {
|
|
var dtbullets = bullets.slice(0, bullets.length - 1);
|
|
dtbullets.push(':');
|
|
return [ { type: 'TAG', name: 'listItem', bullets: bullets } ]
|
|
.concat( c
|
|
,[{ type: 'TAG', name: 'listItem', bullets: dtbullets } ]
|
|
, d );
|
|
}
|
|
}
|
|
|
|
|
|
list_char = [*#:;]
|
|
|
|
|
|
/* Tables */
|
|
|
|
table
|
|
= tas:table_start c:table_caption? b:table_body? table_end {
|
|
var res = {type: 'TAG', name: 'table'}
|
|
var body = b !== '' ? b : [];
|
|
dp("body: " + pp(body));
|
|
if (tas.length > 0) {
|
|
// FIXME: actually parse and build structure
|
|
res.attribs = [['data-unparsed', tas.join('')]];
|
|
}
|
|
|
|
if (c != '') {
|
|
var caption = [{type: 'TAG', name: 'caption'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'caption'}]);
|
|
} else {
|
|
var caption = [];
|
|
}
|
|
//dp(pp(res));
|
|
|
|
return [res].concat(caption, body,
|
|
[{type: 'ENDTAG', name: 'table'}]);
|
|
}
|
|
|
|
table_start
|
|
= sol
|
|
"{|"
|
|
& { setFlag('table'); return true; }
|
|
ta:table_attribs*
|
|
space* {
|
|
//dp("table_start " + pp(ta) + ", pos:" + pos);
|
|
return ta;
|
|
}
|
|
/ sol "{|" { clearFlag('table'); return null; }
|
|
|
|
table_attribs
|
|
= text / ! inline_breaks !newline .
|
|
|
|
table_caption
|
|
= newline
|
|
"|+" c:inline* {
|
|
return c;
|
|
}
|
|
|
|
table_body
|
|
= & { dp("table_body enter"); return true; }
|
|
firstrow:table_firstrow otherrows:table_row* {
|
|
/* dp('table first and otherrows: '
|
|
* + pp([firstrow].concat(otherrows))); */
|
|
return [firstrow].concat(otherrows);
|
|
}
|
|
/ otherrows:table_row* {
|
|
//dp('table otherrows: ' + pp(otherrows));
|
|
return otherrows;
|
|
}
|
|
|
|
table_firstrow
|
|
= td:table_data+ {
|
|
return [{ type: 'TAG', name: 'tr' }]
|
|
.concat(td, [{type: 'ENDTAG', name: 'tr'}]);
|
|
}
|
|
|
|
table_row
|
|
= & { dp("table row enter"); return true; }
|
|
newline
|
|
"|-" thtd_attribs? space* td:(table_data / table_header)* {
|
|
return [{type: 'TAG', name: 'tr'}]
|
|
.concat(td, [{type: 'ENDTAG', name: 'tr'}]);
|
|
}
|
|
|
|
table_data
|
|
= & { dp("table_data enter, pos=" + pos); return true; }
|
|
("||" / newline "|")
|
|
! [}+-]
|
|
a:thtd_attribs?
|
|
td:(!inline_breaks anyblock)* {
|
|
dp("table data result: " + pp(td) + ", attribts: " + pp(a));
|
|
return [{ type: 'TAG', name: 'td', attribs: [['data-unparsed', a]]}]
|
|
.concat(td, [{type: 'ENDTAG', name: 'td'}]);
|
|
}
|
|
|
|
table_header
|
|
= ("!!" / newline "!")
|
|
a:thtd_attribs?
|
|
c:inline {
|
|
return [{type: 'TAG', name: 'th', attribs: [['data-unparsed', a]]}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'th'}]);
|
|
}
|
|
|
|
thtd_attribs
|
|
// In particular, do not match [|\n]
|
|
= a:(text / ! inline_breaks [="':;/,.-] )+ "|" ! [|}+-] {
|
|
return a;
|
|
}
|
|
|
|
|
|
table_end = newline? "|}" { clearFlag('table'); }
|
|
|
|
|
|
/* Wikidom TODO:
|
|
* split off text into content nodes
|
|
* convert inlines into annotations
|
|
* change contents into children
|
|
*
|
|
* { text: text,
|
|
* annotations: [(normal annotations)],
|
|
* maybeannotations: [
|
|
* { type: 'something',
|
|
* side: MA_START,
|
|
* tag: { start: x, length: y }
|
|
* }
|
|
* ]
|
|
* }
|
|
* offsets in annotations: presume maybeannotations are actually text
|
|
* -> need to transform annotations if match found
|
|
* -> format annotations, comments can run to the end (re-opened after
|
|
* block-level tags); only closed on table cells, object,?
|
|
* -> other annotations (images, templates etc) are limited by block-level
|
|
* elements, tightly bound
|
|
*
|
|
* Block-level elements
|
|
* --------------------
|
|
* - Need some early clean-up to provide structure and offsets
|
|
* - Establish scope limits for some inlines
|
|
* - Line-based balanced by construction
|
|
* - HTML tags need balancing/ matching / implicit close
|
|
* - content in illegal places (e.g. between table and td tags) needs foster
|
|
* parenting
|
|
* - grammar will match outermost pair if unmatched pairs are recognized as
|
|
* tokens (or as text)
|
|
* - post-processing needed, but has to be limited by scope
|
|
*/
|
|
/* Tabs do not mix well with the hybrid production syntax */
|
|
/* vim: et:ts=4:sw=4:cindent */
|