mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-15 18:39:52 +00:00
b750ce38b8
Builds a DOM tree (jsdom) from the tokens and then serializes that using document.innerHTML. This is all very experimental, so don't be surprised by rough edges.
777 lines
20 KiB
Plaintext
777 lines
20 KiB
Plaintext
/* Produces output more or less compatible with FakeParser; plug it into FP's output and see */
|
|
{
|
|
var dp = function ( msg ) {
|
|
if ( false ) {
|
|
console.log(msg);
|
|
}
|
|
};
|
|
|
|
/*
|
|
* Flags for specific parse environments (inside tables, links etc). Flags
|
|
* trigger syntactic stops in the inline_breaks production, which
|
|
* terminates inline and attribute matches. Flags merely reduce the number
|
|
* of productions needed: The grammar is still context-free as the
|
|
* productions can just be unrolled for all combinations of environments
|
|
* at the cost of a much larger grammar.
|
|
*/
|
|
var syntaxFlags = {};
|
|
var setFlag = function(flag) {
|
|
if (syntaxFlags[flag] !== undefined) {
|
|
syntaxFlags[flag]++;
|
|
} else {
|
|
syntaxFlags[flag] = 1;
|
|
}
|
|
return true;
|
|
};
|
|
var clearFlag = function(flag) {
|
|
syntaxFlags[flag]--;
|
|
};
|
|
|
|
|
|
var pp = function ( s ) { return JSON.stringify(s, null, 2); }
|
|
|
|
// Convert list prefixes to a list of WikiDom list styles
|
|
var bulletsToTypes = function (bullets) {
|
|
var bTypes = [];
|
|
var blen = bullets.length;
|
|
for (var i = 0; i < bullets.length; i++) {
|
|
switch (bullets[i]) {
|
|
case '*':
|
|
bTypes.push('bullet'); break;
|
|
case '#':
|
|
bTypes.push('number'); break;
|
|
case ';':
|
|
bTypes.push('term'); break;
|
|
case ':':
|
|
bTypes.push('description'); break;
|
|
}
|
|
}
|
|
return bTypes;
|
|
};
|
|
|
|
/*var extractInline = function ( node ) {
|
|
return { text: extractText(node, 0) };
|
|
};
|
|
|
|
|
|
// return [text [annotations]]
|
|
var extractText = function ( node, offset ) {
|
|
dp("extract: " + pp(node));
|
|
if (typeof node === 'string') {
|
|
return [node, []];
|
|
} else if ($.isArray(node)) {
|
|
var texts = [],
|
|
annotations = [];
|
|
for (var i = 0, length = node.length; i < length; i++) {
|
|
var res = extractText(node[i], offset);
|
|
texts.push(res[0]);
|
|
annotations.concat(res[1]);
|
|
offset += res[0].length;
|
|
}
|
|
return [texts.join(''), annotations];
|
|
} else if ( 'text' in node ) {
|
|
var res = extractText(node, offset);
|
|
if ('annotations' in node) {
|
|
return [res[0], node.annotations.concat(res[1])];
|
|
} else {
|
|
return res;
|
|
}
|
|
} else if ( 'content' in node ) {
|
|
return extractText(node.content, offset);
|
|
} else if ( 'children' in node ) {
|
|
var texts = [];
|
|
for (var i = 0, length = node.children.length; i < length; i++) {
|
|
texts.push(extractText(node.children[i]));
|
|
}
|
|
return texts.join('');
|
|
} else {
|
|
throw ("extract failed: " + pp(node));
|
|
}
|
|
};
|
|
*/
|
|
|
|
// Start position of top-level block
|
|
var blockStart = 0;
|
|
|
|
var unquote = function (quotec, text) {
|
|
return text.replace('\\' + quotec, quotec);
|
|
};
|
|
|
|
var flatten = function ( e ) {
|
|
var es = [];
|
|
// flatten sub-arrays
|
|
for(var i = 0, length = e.length; i < length; i++) {
|
|
var ei = e[i];
|
|
if ($.isArray(ei))
|
|
es = es.concat(flatten(ei));
|
|
else
|
|
es.push(ei);
|
|
};
|
|
return es;
|
|
};
|
|
}
|
|
|
|
start
|
|
= e:toplevelblock* newline* {
|
|
return flatten(e);
|
|
}
|
|
|
|
anyblock = block / inline
|
|
anyblockline = block / inlineline
|
|
|
|
|
|
// All chars that cannot start syntactic structures
|
|
text = t:[A-Za-z0-9,._ -]+ { return t.join('') }
|
|
|
|
space
|
|
= s:[ \t]+ { return s.join(''); }
|
|
|
|
|
|
// Start of line
|
|
sol = (newline / & { return pos === 0; } { return true; })
|
|
cn:(c:comment n:newline? { return [c, n] })? {
|
|
return cn;
|
|
}
|
|
|
|
|
|
newline
|
|
= '\n' / '\r\n'
|
|
|
|
toplevelblock
|
|
= & { blockStart = pos; return true; } b:block {
|
|
b = flatten(b);
|
|
var bs = b[0];
|
|
dp('toplevelblock:' + pp(b));
|
|
if (bs.attribs === undefined) {
|
|
bs.attribs = [];
|
|
}
|
|
bs.attribs.push(['startPos', blockStart]);
|
|
bs.attribs.push(['endPos', pos]);
|
|
return b;
|
|
}
|
|
|
|
block
|
|
= (sol space* &newline)? bl:block_lines { return bl; }
|
|
/ para
|
|
/ comment
|
|
/ (s:sol {
|
|
if (s) {
|
|
return [s, {type: 'NEWLINE'}];
|
|
} else {
|
|
return [{type: 'NEWLINE'}];
|
|
}
|
|
}
|
|
)
|
|
|
|
|
|
// Block structures with start-of-line wiki syntax
|
|
block_lines
|
|
= h
|
|
/ table
|
|
/ lists
|
|
/ pre_indent
|
|
|
|
|
|
/* Headings */
|
|
h = h1 / h2 / h3 / h4 / h5 / h6
|
|
|
|
h1 = sol '='
|
|
(
|
|
& { setFlag('h'); return setFlag('h1') }
|
|
c:inlineline '=' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h1');
|
|
return [{type: 'TAG', name: 'h1'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h1'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h1'); return null }
|
|
)
|
|
|
|
h2 = sol '=='
|
|
(
|
|
& { setFlag('h'); return setFlag('h2') }
|
|
c:inlineline '==' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h2');
|
|
return [{type: 'TAG', name: 'h2'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h2'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h2'); return null }
|
|
)
|
|
|
|
h3 = sol '==='
|
|
(
|
|
& { setFlag('h'); return setFlag('h3') }
|
|
c:inlineline '===' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h3');
|
|
return [{type: 'TAG', name: 'h3'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h3'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h3'); return null }
|
|
)
|
|
|
|
h4 = sol '===='
|
|
(
|
|
& { setFlag('h'); return setFlag('h4') }
|
|
c:inlineline '====' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h4');
|
|
return [{type: 'TAG', name: 'h4'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h4'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h4'); return null }
|
|
)
|
|
|
|
h5 = sol '====='
|
|
(& { setFlag('h'); return setFlag('h5') }
|
|
c:inlineline '=====' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h5');
|
|
return [{type: 'TAG', name: 'h5'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h5'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h5'); return null }
|
|
)
|
|
|
|
h6 = sol '======'
|
|
(& { setFlag('h'); return setFlag('h6') }
|
|
c:inlineline '======' comment? &newline {
|
|
clearFlag('h');
|
|
clearFlag('h6');
|
|
return [{type: 'TAG', name: 'h6'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'h6'}]);
|
|
}
|
|
/ { clearFlag('h'); clearFlag('h6'); return null }
|
|
)
|
|
|
|
heading_marker
|
|
= '=' '='*
|
|
|
|
heading_text
|
|
= h:( !(heading_marker newline) x:inlineline { return x } )* { return h.join(''); }
|
|
|
|
|
|
// TODO: convert inline content to annotations!
|
|
para
|
|
= (sol br)? pl:para_lines { return pl; }
|
|
|
|
para_lines
|
|
= s:sol c:inlineline cs:(!block_lines para_lines)* {
|
|
var res = [{type: 'TAG', name: 'p'}];
|
|
if (s !== '') {
|
|
res.push(s)
|
|
}
|
|
//console.log('paralines' + pp(res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}])));
|
|
return res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}]);
|
|
}
|
|
|
|
br = space* &newline { return {type: 'SELFCLOSINGTAG', name: 'br'} }
|
|
|
|
pre_indent
|
|
= l:pre_indent_line+ {
|
|
return [{type: 'TAG', name: 'pre'}]
|
|
.concat( l
|
|
, [{type: 'ENDTAG', name: 'pre'}]);
|
|
}
|
|
pre_indent_line = sol space l:inlineline { return l }
|
|
|
|
// Syntax that stops inline expansion
|
|
inline_breaks
|
|
= //& { console.log(pp(syntaxFlags)); return true; }
|
|
& { return syntaxFlags['table']; }
|
|
a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a)); return true; }
|
|
/ & { return syntaxFlags['italic']; } italic_marker { return true; }
|
|
/ & { return syntaxFlags['bold']; } bold_marker { return true; }
|
|
/ & { return syntaxFlags['linkdesc']; } link_end { return true; }
|
|
/ & { return syntaxFlags['h']; }
|
|
( & { return syntaxFlags['h1'] } '=' newline { return true; }
|
|
/ & { return syntaxFlags['h2'] } '==' newline { return true; }
|
|
/ & { return syntaxFlags['h3'] } '===' newline { return true; }
|
|
/ & { return syntaxFlags['h4'] } '====' newline { return true; }
|
|
/ & { return syntaxFlags['h5'] } '=====' newline { return true; }
|
|
/ & { return syntaxFlags['h6'] } '======' newline { return true; }
|
|
)
|
|
|
|
|
|
|
|
inline
|
|
= c:(text / inline_element / (!inline_breaks ch:. { return ch; }))+ {
|
|
var out = [];
|
|
var text = [];
|
|
c = flatten(c);
|
|
for (var i = 0; i < c.length; i++) {
|
|
if (typeof c[i] == 'string') {
|
|
text.push(c[i]);
|
|
} else {
|
|
if (text.length) {
|
|
out.push({ type: "TEXT", value: text.join('') });
|
|
text = [];
|
|
}
|
|
out.concat(c[i]);
|
|
}
|
|
}
|
|
if (text.length) {
|
|
out.push({ type: 'TEXT', value: text.join('') });
|
|
}
|
|
return out;
|
|
}
|
|
|
|
inlineline
|
|
= c:(text / !inline_breaks (inline_element / [^\n]))+ {
|
|
var out = [];
|
|
var text = [];
|
|
c = flatten(c);
|
|
for (var i = 0; i < c.length; i++) {
|
|
if (typeof c[i] == 'string') {
|
|
text.push(c[i]);
|
|
} else {
|
|
if (text.length) {
|
|
out.push({type: 'TEXT', value: text.join('')});
|
|
text = [];
|
|
}
|
|
out.push(c[i]);
|
|
}
|
|
}
|
|
if (text.length) {
|
|
out.push({type: 'TEXT', value: text.join('')});
|
|
}
|
|
//dp('inlineline out:', pp(out));
|
|
return out;
|
|
}
|
|
|
|
/* TODO: convert all these to annotations!
|
|
* -> need (start, end) offsets within block
|
|
*/
|
|
inline_element
|
|
= comment
|
|
/ xmlish_tag
|
|
/ extlink
|
|
/ template
|
|
/ link
|
|
/ bold
|
|
/ italic
|
|
|
|
comment
|
|
= '<!--' c:comment_chars* '-->'
|
|
(space* newline space* comment)* {
|
|
return [{ type: 'COMMENT', value: c.join('') }];
|
|
}
|
|
|
|
comment_chars
|
|
= c:[^-] { return c; }
|
|
/ c:'-' !'->' { return c; }
|
|
|
|
extlink
|
|
= "[" target:url " " text:extlink_text "]" {
|
|
return [ { type: 'TAG',
|
|
name: 'a',
|
|
attribs: [['href', target]] }
|
|
, {type: 'TEXT', value: text}
|
|
, {type: 'ENDTAG', name: 'a'}];
|
|
}
|
|
|
|
// = "[" target:url text:extlink_text "]" { return { type: 'extlink', target: target, text: text } }
|
|
|
|
url
|
|
= proto:"http:" rest:([^ \]]+) { return proto + rest.join(''); }
|
|
|
|
extlink_text
|
|
= c:[^\]]+ { return c.join(''); }
|
|
|
|
template
|
|
= "{{" target:link_target params:("|" p:template_param { return p })* "}}" {
|
|
var obj = { type: 'SELFCLOSINGTAG', name: 'template', attribs: [['target', target]] }
|
|
if (params && params.length) {
|
|
obj.attribs.push(params);
|
|
}
|
|
return obj;
|
|
}
|
|
|
|
template_param
|
|
= name:template_param_name "=" c:template_param_text {
|
|
return [name, c];
|
|
} / c:template_param_text {
|
|
return [null, c];
|
|
}
|
|
|
|
tplarg
|
|
= "{{{" name:link_target params:("|" p:template_param { return p })* "}}}" {
|
|
var obj = {
|
|
type: 'SELFCLOSINGTAG',
|
|
name: 'templatearg',
|
|
attribs: [['argname', name]]
|
|
};
|
|
if (params && params.length) {
|
|
obj.attribs.push(params);
|
|
}
|
|
return obj;
|
|
}
|
|
|
|
template_param_name
|
|
= h:( !"}}" x:([^=|]) { return x } )* { return h.join(''); }
|
|
|
|
template_param_text
|
|
= template_param_text_chunk*
|
|
/* = h:( !"}}" x:([^|]) { return x } )* { return h.join(''); }*/
|
|
|
|
template_param_text_chunk
|
|
= comment
|
|
/ xmlish_tag
|
|
/ extlink
|
|
/ template
|
|
/ link
|
|
/ bold
|
|
/ italic
|
|
/ !"}}" x:([^|]) { return x }
|
|
|
|
link
|
|
= "[[" target:link_target text:("|" link_text)* "]]" {
|
|
var obj = {
|
|
type: 'TAG',
|
|
name: 'a',
|
|
attribs: [['data-type', 'internal']]
|
|
};
|
|
if (text && text.length) {
|
|
obj.attribs.push(['href', text[0][1]]); // ehhhh
|
|
}
|
|
return [obj, {type: 'ENDTAG', name: 'a'}];
|
|
}
|
|
|
|
link_target
|
|
= h:( !"]]" x:([^|]) { return x } )* { return h.join(''); }
|
|
|
|
link_text
|
|
= h:( & { return setFlag('linkdesc'); }
|
|
x:inlineline { return x }
|
|
)* {
|
|
clearFlag('linkdesc')
|
|
return h.join('');
|
|
}
|
|
/ & { clearFlag('linkdesc') } { return null; }
|
|
|
|
link_end = "]]"
|
|
|
|
bold
|
|
= bold_marker
|
|
& { dp('benter:' + pos); return setFlag('bold'); }
|
|
c:inlineline
|
|
bold_marker {
|
|
clearFlag('bold');
|
|
return [{ type: 'TAG', name: 'b' }]
|
|
.concat(c, [{type: 'ENDTAG', name: 'b'}]);
|
|
}
|
|
/ bold_marker { clearFlag('bold'); return null }
|
|
|
|
bold_marker
|
|
= "'''"
|
|
|
|
|
|
italic
|
|
= italic_marker
|
|
& { dp('ienter:' + pos); return setFlag('italic'); }
|
|
c:inlineline
|
|
italic_marker {
|
|
clearFlag('italic');
|
|
dp('ileave:' + pos);
|
|
return [{ type: 'TAG', name: 'i' }]
|
|
.concat(c, [{ type: 'ENDTAG', name: 'i'}]);
|
|
}
|
|
/ italic_marker { clearFlag('italic'); return null }
|
|
|
|
italic_marker
|
|
= "''"
|
|
|
|
/* Will need to check anything xmlish agains known/allowed HTML tags and
|
|
* registered extensions, otherwise fail the match. Should ref be treated as a
|
|
* regular extension? */
|
|
xmlish_tag = ref / references
|
|
|
|
ref = ref_full / ref_empty
|
|
|
|
/* Can we do backreferences to genericize this? */
|
|
ref_full
|
|
= start:ref_start ">" content:ref_content* close:ref_end {
|
|
return [
|
|
{ type: 'TAG',
|
|
name: 'ext',
|
|
attribs: [['data-extname', 'ref']]
|
|
.concat(start.params, [['data-startws', start.ws]])},
|
|
content,
|
|
{type: 'ENDTAG', name: 'ref'}
|
|
];
|
|
}
|
|
|
|
ref_empty
|
|
= start:ref_start close:(space* "/>") {
|
|
return [{ type: 'SELFCLOSINGTAG',
|
|
name: 'ext',
|
|
attribs: [['data-extname', 'ref']]
|
|
.concat(start.params
|
|
,[['data-startws', start.ws]])
|
|
}];
|
|
}
|
|
|
|
ref_start
|
|
= "<ref" params:ext_param* ws:space* {
|
|
return {
|
|
params: params,
|
|
ws: ws
|
|
};
|
|
}
|
|
|
|
ref_end
|
|
= all:("</ref" space* ">") {
|
|
return all.join('');
|
|
}
|
|
|
|
ref_content
|
|
= !ref_end a:inline { // XXX: ineffective syntactic stop
|
|
return a;
|
|
}
|
|
|
|
/* fixme probably have to programatically add these */
|
|
references = references_full / references_empty
|
|
|
|
references_full
|
|
= start:references_start ">" content:references_content* close:references_end {
|
|
return [
|
|
{ type: 'TAG',
|
|
name: 'ext',
|
|
attribs: [['data-extname', 'references']]
|
|
.concat(start.params
|
|
,[['data-startws', start.ws]])
|
|
},
|
|
content,
|
|
{ type: 'ENDTAG', name: 'ext' }
|
|
];
|
|
}
|
|
|
|
references_empty
|
|
= start:references_start close:(space* "/>") {
|
|
return
|
|
[{ type: 'SELFCLOSINGTAG',
|
|
name: 'ext',
|
|
attribs: [['data-extname', 'references']]
|
|
.concat(start.params
|
|
,[['data-startws', start.ws]])
|
|
}];
|
|
}
|
|
|
|
references_start
|
|
= "<references" params:ext_param* ws:space* {
|
|
return {
|
|
params: params,
|
|
ws: ws
|
|
};
|
|
}
|
|
|
|
references_end
|
|
= all:("</references" space* ">") {
|
|
return all.join('');
|
|
}
|
|
|
|
references_content
|
|
= !references_end a:inline {
|
|
return a;
|
|
}
|
|
|
|
|
|
ext_param
|
|
= space* name:ext_param_name "=" val:ext_param_val {
|
|
val[0] = name;
|
|
return val;
|
|
}
|
|
|
|
ext_param_name
|
|
= name:[a-zA-Z0-9-]+ {
|
|
return name.join('');
|
|
}
|
|
|
|
ext_param_val
|
|
= t:[0-9A-Za-z]+ { return [null, t.join('')]; }
|
|
/ "'" t:[^'>]+ "'" { return [null, unquote("'", t.join(''))]; }
|
|
/ '"' t:[^">]+ '"' { return [null, unquote('"', t.join(''))]; }
|
|
|
|
lists = es:(dtdd / li)+
|
|
{
|
|
return [ { type: 'TAG',
|
|
name: 'ul'} ] // XXX!!
|
|
.concat(flatten(es)
|
|
,[{ type: 'ENDTAG', name: 'ul' }]);
|
|
}
|
|
|
|
li = sol
|
|
bullets:list_char+
|
|
c:inlineline
|
|
&newline
|
|
{
|
|
return [ { type: 'TAG',
|
|
name: 'li',
|
|
attribs: [['data-styles', bullets]] }
|
|
, c
|
|
, { type: 'ENDTAG', name: 'li' }
|
|
];
|
|
}
|
|
|
|
dtdd = sol
|
|
bullets:list_char+
|
|
c:(inline_element / (n:[^:\n] { return {type: 'TEXT', value: n}; }))+
|
|
":"
|
|
d:(inline_element / (n:[^\n] { return {type: 'TEXT', value: n}; }))+
|
|
&newline
|
|
{
|
|
// reject rule if bullets do not end in semicolon
|
|
if (bullets[bullets.length - 1] != ';') {
|
|
return null;
|
|
} else {
|
|
return [ { type: 'TAG', name: 'dl', attribs: [['data-styles', bullets]] }
|
|
, { type: 'TAG', name: 'dt' } ]
|
|
.concat( c
|
|
, [ {type: 'ENDTAG', name: 'dt'}
|
|
, {type: 'TAG', name: 'dd'} ]
|
|
, d
|
|
, [ {type: 'ENDTAG', name: 'dd'}
|
|
, {type: 'ENDTAG', name: 'dl'} ]);
|
|
}
|
|
}
|
|
|
|
|
|
list_char = [*#:;]
|
|
|
|
|
|
/* Tables */
|
|
|
|
table
|
|
= tas:table_start c:table_caption? b:table_body? table_end {
|
|
var res = {type: 'TAG', name: 'table'}
|
|
var body = b !== '' ? b : [];
|
|
dp("body: " + pp(body));
|
|
if (tas.length > 0) {
|
|
// FIXME: actually parse and build structure
|
|
res.attribs = [['data-unparsed', tas.join('')]];
|
|
}
|
|
|
|
if (c != '') {
|
|
var caption = [{type: 'TAG', name: 'caption'}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'caption'}]);
|
|
} else {
|
|
var caption = [];
|
|
}
|
|
//dp(pp(res));
|
|
|
|
return [res].concat(caption, body,
|
|
[{type: 'ENDTAG', name: 'table'}]);
|
|
}
|
|
|
|
table_start
|
|
= sol
|
|
"{|"
|
|
& { setFlag('table'); return true; }
|
|
ta:table_attribs*
|
|
space* {
|
|
//dp("table_start " + pp(ta) + ", pos:" + pos);
|
|
return ta;
|
|
}
|
|
/ sol "{|" { clearFlag('table'); return null; }
|
|
|
|
table_attribs
|
|
= text / ! inline_breaks !newline .
|
|
|
|
table_caption
|
|
= newline
|
|
"|+" c:inline* {
|
|
return c;
|
|
}
|
|
|
|
table_body
|
|
= & { dp("table_body enter"); return true; }
|
|
firstrow:table_firstrow otherrows:table_row* {
|
|
/* dp('table first and otherrows: '
|
|
* + pp([firstrow].concat(otherrows))); */
|
|
return [firstrow].concat(otherrows);
|
|
}
|
|
/ otherrows:table_row* {
|
|
//dp('table otherrows: ' + pp(otherrows));
|
|
return otherrows;
|
|
}
|
|
|
|
table_firstrow
|
|
= td:table_data+ {
|
|
return [{ type: 'TAG', name: 'tr' }]
|
|
.concat(td, [{type: 'ENDTAG', name: 'tr'}]);
|
|
}
|
|
|
|
table_row
|
|
= & { dp("table row enter"); return true; }
|
|
newline
|
|
"|-" thtd_attribs? space* td:(table_data / table_header)* {
|
|
return [{type: 'TAG', name: 'tr'}]
|
|
.concat(td, [{type: 'ENDTAG', name: 'tr'}]);
|
|
}
|
|
|
|
table_data
|
|
= & { dp("table_data enter, pos=" + pos); return true; }
|
|
("||" / newline "|")
|
|
! [}+-]
|
|
a:thtd_attribs?
|
|
td:(!inline_breaks anyblock)* {
|
|
dp("table data result: " + pp(td) + ", attribts: " + pp(a));
|
|
return [{ type: 'TAG', name: 'td', attribs: [['data-unparsed', a]]}]
|
|
.concat(td, [{type: 'ENDTAG', name: 'td'}]);
|
|
}
|
|
|
|
table_header
|
|
= ("!!" / newline "!")
|
|
a:thtd_attribs?
|
|
c:inline {
|
|
return [{type: 'TAG', name: 'th', attribs: [['data-unparsed', a]]}]
|
|
.concat(c, [{type: 'ENDTAG', name: 'th'}]);
|
|
}
|
|
|
|
thtd_attribs
|
|
// In particular, do not match [|\n]
|
|
= a:(text / ! inline_breaks [="':;/,.-] )+ "|" ! [|}+-] {
|
|
return a;
|
|
}
|
|
|
|
|
|
table_end = newline? "|}" { clearFlag('table'); }
|
|
|
|
|
|
/* Wikidom TODO:
|
|
* split off text into content nodes
|
|
* convert inlines into annotations
|
|
* change contents into children
|
|
*
|
|
* { text: text,
|
|
* annotations: [(normal annotations)],
|
|
* maybeannotations: [
|
|
* { type: 'something',
|
|
* side: MA_START,
|
|
* tag: { start: x, length: y }
|
|
* }
|
|
* ]
|
|
* }
|
|
* offsets in annotations: presume maybeannotations are actually text
|
|
* -> need to transform annotations if match found
|
|
* -> format annotations, comments can run to the end (re-opened after
|
|
* block-level tags); only closed on table cells, object,?
|
|
* -> other annotations (images, templates etc) are limited by block-level
|
|
* elements, tightly bound
|
|
*
|
|
* Block-level elements
|
|
* --------------------
|
|
* - Need some early clean-up to provide structure and offsets
|
|
* - Establish scope limits for some inlines
|
|
* - Line-based balanced by construction
|
|
* - HTML tags need balancing/ matching / implicit close
|
|
* - content in illegal places (e.g. between table and td tags) needs foster
|
|
* parenting
|
|
* - grammar will match outermost pair if unmatched pairs are recognized as
|
|
* tokens (or as text)
|
|
* - post-processing needed, but has to be limited by scope
|
|
*/
|
|
/* Tabs do not mix well with the hybrid production syntax */
|
|
/* vim: et:ts=4:sw=4:cindent */
|