mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-24 06:24:08 +00:00
775 lines
16 KiB
Plaintext
775 lines
16 KiB
Plaintext
/* Produces output more or less compatible with FakeParser; plug it into FP's output and see */
|
|
|
|
{
|
|
var dp = function ( msg ) {
|
|
if ( false ) {
|
|
console.log(msg);
|
|
}
|
|
}
|
|
|
|
|
|
// Forbidden chars in anything content
|
|
var forbiddenThingRegexp = {
|
|
// key: regexp
|
|
// value: nesting counter, regexp removed if zero reached
|
|
kvs: {},
|
|
regexp: "",
|
|
rebuild: function () {
|
|
var keys = [];
|
|
for (var key in this.kvs) {
|
|
keys.push(key);
|
|
}
|
|
this.regexp = keys.join('|');
|
|
},
|
|
push: function (key) {
|
|
if (this.kvs[key] !== undefined) {
|
|
this.kvs[key]++;
|
|
} else {
|
|
this.kvs[key] = 1;
|
|
this.rebuild();
|
|
}
|
|
},
|
|
pop: function (key) {
|
|
if (this.kvs[key] !== undefined) {
|
|
if(this.kvs[key] == 1) {
|
|
delete this.kvs[key];
|
|
this.rebuild();
|
|
} else {
|
|
this.kvs[key]--;
|
|
}
|
|
} else {
|
|
throw "Trying to pop non-existing forbiddenThingRegexp";
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/* Temporary debugging help. Is there anything similar in JS or a library? */
|
|
var print_r = function (arr, level) {
|
|
|
|
var dumped_text = "";
|
|
if (!level) level = 0;
|
|
|
|
//The padding given at the beginning of the line.
|
|
var level_padding = "";
|
|
var bracket_level_padding = "";
|
|
|
|
for (var j = 0; j < level + 1; j++) level_padding += " ";
|
|
for (var b = 0; b < level; b++) bracket_level_padding += " ";
|
|
|
|
if (typeof(arr) == 'object') { //Array/Hashes/Objects
|
|
dumped_text += "Array\n";
|
|
dumped_text += bracket_level_padding + "(\n";
|
|
for (var item in arr) {
|
|
|
|
var value = arr[item];
|
|
|
|
if (typeof(value) == 'object') { //If it is an array,
|
|
dumped_text += level_padding + "[" + item + "] => ";
|
|
dumped_text += print_r(value, level + 2);
|
|
} else {
|
|
dumped_text += level_padding + "[" + item + "] => '" + value + "'\n";
|
|
}
|
|
|
|
}
|
|
dumped_text += bracket_level_padding + ")\n\n";
|
|
} else { //Stings/Chars/Numbers etc.
|
|
dumped_text = "===>" + arr + "<===(" + typeof(arr) + ")";
|
|
}
|
|
|
|
return dumped_text;
|
|
|
|
}
|
|
}
|
|
|
|
start
|
|
= e:block* {
|
|
var es = [];
|
|
// flatten sub-arrays, as a list block can contain multiple lists
|
|
$.each(e, function(i, ei) {
|
|
if (ei.constructor == Array)
|
|
es = es.concat(ei);
|
|
else
|
|
es.push(ei);
|
|
});
|
|
//dp(print_r(es));
|
|
return {
|
|
type: 'page',
|
|
content: es
|
|
}
|
|
}
|
|
|
|
anyblock = block / inline
|
|
anyblockline = block / inlineline
|
|
|
|
anything
|
|
= text
|
|
/ s:.+ {
|
|
// reject match if forbiddenThingRegexp matches
|
|
var str = s.join('');
|
|
dp("anything: " +print_r(str));
|
|
if (forbiddenThingRegexp.regexp !== '') {
|
|
var m = str.search(forbiddenThingRegexp.regexp)
|
|
if ( m > 0 ) {
|
|
dp("anything reverse " + (str.length - m)
|
|
+ ", matched: " + str.substr(0,m));
|
|
// reverse parser position
|
|
pos -= str.length - m;
|
|
return str.substr(0,m);
|
|
} else {
|
|
if (m == 0) {
|
|
pos -= str.length;
|
|
return null;
|
|
} else {
|
|
return str;
|
|
}
|
|
}
|
|
} else {
|
|
return str;
|
|
}
|
|
}
|
|
|
|
anyline
|
|
= text
|
|
/ s:[^\n]+ {
|
|
// reject match if forbiddenThingRegexp matches
|
|
var str = s.join('');
|
|
dp("anyline: " + print_r(str) + ", pos:" + pos);
|
|
if (forbiddenThingRegexp.regexp !== '') {
|
|
var m = str.search(forbiddenThingRegexp.regexp)
|
|
if ( m > 0 ) {
|
|
// reverse parser position
|
|
pos -= str.length - m;
|
|
dp("anyline reverse " + (str.length - m)
|
|
+ ", matched: " + str.substr(0,m));
|
|
return str.substr(0,m);
|
|
} else {
|
|
if (m == 0) {
|
|
pos -= str.length;
|
|
return null;
|
|
} else {
|
|
return str;
|
|
}
|
|
}
|
|
} else {
|
|
return str;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
// All chars that cannot start syntactic structures
|
|
text = t:[A-Za-z0-9,._ -]+ { return t.join('') }
|
|
|
|
space
|
|
= s:[ ]+ { return s.join(''); }
|
|
|
|
newline
|
|
= [\n]
|
|
|
|
block
|
|
= br
|
|
/ h
|
|
/ table
|
|
/ lists
|
|
/ para
|
|
|
|
h = h1 / h2 / h3 / h4 / h5 / h6
|
|
|
|
h1 = '=' c:heading_text '=' newline {
|
|
return {
|
|
type: 'h',
|
|
level: 1,
|
|
text: c
|
|
}
|
|
}
|
|
|
|
h2 = '==' c:heading_text '==' newline {
|
|
return {
|
|
type: 'h',
|
|
level: 2,
|
|
text: c
|
|
}
|
|
}
|
|
|
|
h3 = '===' c:heading_text '===' newline {
|
|
return {
|
|
type: 'h',
|
|
level: 3,
|
|
text: c
|
|
}
|
|
}
|
|
|
|
h4 = '====' c:heading_text '====' newline {
|
|
return {
|
|
type: 'h',
|
|
level: 4,
|
|
text: c
|
|
}
|
|
}
|
|
|
|
h5 = '=====' c:heading_text '=====' newline {
|
|
return {
|
|
type: 'h',
|
|
level: 5,
|
|
text: c
|
|
}
|
|
}
|
|
|
|
h6 = '======' c:heading_text '======' newline {
|
|
return {
|
|
type: 'h',
|
|
level: 6,
|
|
text: c
|
|
}
|
|
}
|
|
|
|
heading_marker
|
|
= '=' '='*
|
|
|
|
heading_text
|
|
= h:( !heading_marker x:(anyline) { return x } )* { return h.join(''); }
|
|
|
|
br
|
|
= newline { return {type: 'br'} }
|
|
|
|
para
|
|
= c:inlineline newline { return {type: 'para', content: c } } /
|
|
c:anyline
|
|
|
|
inline
|
|
= c:(inline_element / anything)+ {
|
|
var out = [];
|
|
var text = '';
|
|
for (var i = 0; i < c.length; i++) {
|
|
if (typeof c[i] == 'string') {
|
|
text += c[i];
|
|
} else {
|
|
if (text.length) {
|
|
out.push({
|
|
type: 'text',
|
|
text: text
|
|
});
|
|
text = '';
|
|
}
|
|
out.push(c[i]);
|
|
}
|
|
}
|
|
if (text.length) {
|
|
out.push({
|
|
type: 'text',
|
|
text: text
|
|
});
|
|
}
|
|
return out;
|
|
}
|
|
|
|
inlineline
|
|
= c:(inline_element / anyline)+ {
|
|
var out = [];
|
|
var text = '';
|
|
for (var i = 0; i < c.length; i++) {
|
|
if (typeof c[i] == 'string') {
|
|
text += c[i];
|
|
} else {
|
|
if (text.length) {
|
|
out.push({
|
|
type: 'text',
|
|
text: text
|
|
});
|
|
text = '';
|
|
}
|
|
out.push(c[i]);
|
|
}
|
|
}
|
|
if (text.length) {
|
|
out.push({
|
|
type: 'text',
|
|
text: text
|
|
});
|
|
}
|
|
return out;
|
|
}
|
|
|
|
inline_element
|
|
= comment
|
|
/ xmlish_tag
|
|
/ extlink
|
|
/ template
|
|
/ link
|
|
/ bold
|
|
/ italic
|
|
|
|
comment
|
|
= '<!--' c:comment_chars+ '-->' {
|
|
return {
|
|
type: 'comment',
|
|
text: c.join('')
|
|
}
|
|
}
|
|
|
|
comment_chars
|
|
= c:[^-] { return c; }
|
|
/ c:'-' !'-' { return c; }
|
|
|
|
inline_text_run
|
|
= c:[^\n]+ { return c.join(''); }
|
|
|
|
extlink
|
|
= "[" target:url " " text:extlink_text "]" {
|
|
return {
|
|
type: 'extlink',
|
|
target: target,
|
|
text: text
|
|
}
|
|
}
|
|
|
|
// = "[" target:url text:extlink_text "]" { return { type: 'extlink', target: target, text: text } }
|
|
|
|
url
|
|
= proto:"http:" rest:([^ \]]+) { return proto + rest.join(''); }
|
|
|
|
extlink_text
|
|
= c:[^\]]+ { return c.join(''); }
|
|
|
|
template
|
|
= "{{" target:link_target params:("|" p:template_param { return p })* "}}" {
|
|
var obj = {
|
|
type: 'template',
|
|
target: target
|
|
};
|
|
if (params && params.length) {
|
|
obj.params = params;
|
|
}
|
|
return obj;
|
|
}
|
|
|
|
template_param
|
|
= name:template_param_name "=" c:template_param_text {
|
|
return {
|
|
name: name,
|
|
content: c
|
|
};
|
|
} / c:template_param_text {
|
|
return {
|
|
content: c
|
|
};
|
|
}
|
|
|
|
tplarg = "{{{" name:link_target params:("|" p:template_param { return p })* "}}}" {
|
|
var obj = {
|
|
type: 'tplarg',
|
|
name: name
|
|
};
|
|
if (params && params.length) {
|
|
obj.params = params;
|
|
}
|
|
return obj;
|
|
}
|
|
|
|
template_param_name
|
|
= h:( !"}}" x:([^=|]) { return x } )* { return h.join(''); }
|
|
|
|
template_param_text
|
|
= template_param_text_chunk*
|
|
/* = h:( !"}}" x:([^|]) { return x } )* { return h.join(''); }*/
|
|
|
|
template_param_text_chunk
|
|
= comment
|
|
/ xmlish_tag
|
|
/ extlink
|
|
/ template
|
|
/ link
|
|
/ bold
|
|
/ italic
|
|
/ !"}}" x:([^|]) { return x }
|
|
|
|
link
|
|
= "[[" target:link_target text:("|" link_text)* "]]" {
|
|
var obj = {
|
|
type: 'link',
|
|
target: target
|
|
};
|
|
if (text && text.length) {
|
|
obj.text = text[0][1]; // ehhhh
|
|
}
|
|
return obj;
|
|
}
|
|
|
|
link_target
|
|
= h:( !"]]" x:([^|]) { return x } )* { return h.join(''); }
|
|
|
|
link_text
|
|
= h:( !"]]" x:(anyline) { return x } )* { return h.join(''); }
|
|
|
|
bold
|
|
= bold_marker c:bold_text bold_marker {
|
|
return {
|
|
type: 'b',
|
|
text: c,
|
|
}
|
|
}
|
|
|
|
bold_marker
|
|
= "'''"
|
|
|
|
bold_text
|
|
= h:( !bold_marker x:(anyline) { return x } )+ { return h.join(''); }
|
|
|
|
italic
|
|
= italic_marker c:italic_text italic_marker {
|
|
return {
|
|
type: 'i',
|
|
text: c
|
|
}
|
|
}
|
|
|
|
italic_marker
|
|
= "''"
|
|
|
|
italic_text
|
|
= h:( !italic_marker x:(anyline) { return x } )+ { return h.join(''); }
|
|
|
|
/* Will need to check anything xmlish agains known/allowed HTML tags and
|
|
* registered extensions, otherwise fail the match. Should ref be treated as a
|
|
* regular extension? */
|
|
xmlish_tag =
|
|
ref /
|
|
references
|
|
|
|
ref = ref_full / ref_empty
|
|
|
|
/* Can we do backreferences to genericize this? */
|
|
ref_full
|
|
= start:ref_start ">" content:ref_content* close:ref_end {
|
|
return {
|
|
type: 'ext',
|
|
name: 'ref',
|
|
params: start.params,
|
|
ws: start.ws,
|
|
content: content,
|
|
close: close
|
|
}
|
|
}
|
|
|
|
ref_empty
|
|
= start:ref_start close:(space* "/>") {
|
|
return {
|
|
type: 'ext',
|
|
name: 'ref',
|
|
ws: start.ws,
|
|
params: start.params,
|
|
close: close
|
|
}
|
|
}
|
|
|
|
ref_start
|
|
= "<ref" params:ext_param* ws:space* {
|
|
return {
|
|
params: params,
|
|
ws: ws
|
|
};
|
|
}
|
|
|
|
ref_end
|
|
= all:("</ref" space* ">") {
|
|
return all.join('');
|
|
}
|
|
|
|
ref_content
|
|
= !ref_end a:(inline_element / anyline) {
|
|
return a;
|
|
}
|
|
|
|
/* fixme probably have to programatically add these */
|
|
references = references_full / references_empty
|
|
|
|
references_full
|
|
= start:references_start ">" content:references_content* close:references_end {
|
|
return {
|
|
type: 'ext',
|
|
name: 'references',
|
|
params: start.params,
|
|
ws: start.ws,
|
|
content: content,
|
|
close: close
|
|
}
|
|
}
|
|
|
|
references_empty
|
|
= start:references_start close:(space* "/>") {
|
|
return {
|
|
type: 'ext',
|
|
name: 'references',
|
|
ws: start.ws,
|
|
params: start.params,
|
|
close: close
|
|
}
|
|
}
|
|
|
|
references_start
|
|
= "<references" params:ext_param* ws:space* {
|
|
return {
|
|
params: params,
|
|
ws: ws
|
|
};
|
|
}
|
|
|
|
references_end
|
|
= all:("</references" space* ">") {
|
|
return all.join('');
|
|
}
|
|
|
|
references_content
|
|
= !references_end a:(inline_element / anyline) {
|
|
return a;
|
|
}
|
|
|
|
|
|
ext_param
|
|
= space* name:ext_param_name "=" val:ext_param_val {
|
|
val.name = name;
|
|
return val;
|
|
}
|
|
|
|
ext_param_name
|
|
= name:[a-zA-Z0-9-]+ {
|
|
return name.join('');
|
|
}
|
|
|
|
ext_param_val
|
|
= t:[0-9A-Za-z]+ { return {text: t.join('') } }
|
|
/ "'" t:[^'>]+ "'" { return { quote: "'", text: t.join('') } }
|
|
/ '"' t:[^">]+ '"' { return { quote: '"', text: t.join('') } }
|
|
|
|
lists = es:(dtdd / li)+
|
|
{
|
|
var out = [], // List of list nodes
|
|
bstack = "", // Bullet stack, previous element's listStyle
|
|
bnext = "", // Next element's listStyle
|
|
nodes = []; // Stack of currently active, nested list nodes
|
|
|
|
var commonPrefixLength = function (x, y) {
|
|
var minLength = Math.min(x.length, y.length);
|
|
for(var i = 0; i < minLength; i++) {
|
|
if (x[i] != y[i])
|
|
break;
|
|
}
|
|
return i;
|
|
}
|
|
|
|
var pushN = function ( n ) {
|
|
if (nodes.length > 0) {
|
|
nodes[nodes.length - 1].content.push(n);
|
|
} else {
|
|
out.push(n);
|
|
nodes.push(n);
|
|
}
|
|
|
|
}
|
|
|
|
var openLists = function ( bs, bn ) {
|
|
var prefix = commonPrefixLength (bs, bn);
|
|
nodes = nodes.slice(0, prefix);
|
|
$.each(bn.slice(prefix, bn.length), function (i, c) {
|
|
switch (c) {
|
|
case '*':
|
|
pushN({type: 'ul', content: []});
|
|
break;
|
|
case '#':
|
|
pushN({type: 'ol', content: []});
|
|
break;
|
|
case ';':
|
|
case ':':
|
|
pushN({type: 'dl', content: []});
|
|
break;
|
|
default:
|
|
throw("Unknown node prefix " + c);
|
|
}
|
|
});
|
|
}
|
|
|
|
|
|
$.each(es, function(i, e) {
|
|
if (e.type == 'dtdd') {
|
|
bnext = e.content[0].listStyle;
|
|
lnode = openLists( bstack, bnext );
|
|
|
|
nodes[nodes.length - 1].content =
|
|
nodes[nodes.length - 1].content.concat(e.content);
|
|
} else {
|
|
bnext = e.listStyle;
|
|
openLists( bstack, bnext, nodes );
|
|
nodes[nodes.length - 1].content.push(e);
|
|
}
|
|
bstack = bnext;
|
|
});
|
|
//dp("out: " + print_r(out, 5));
|
|
return out;
|
|
}
|
|
|
|
li = bullets:list_char+
|
|
c:(inlineline / anyline)
|
|
newline
|
|
{
|
|
var type;
|
|
switch (bullets[bullets.length - 1]) {
|
|
case '#':
|
|
case '*':
|
|
type = 'li'; break;
|
|
case ';': type = 'dt'; break;
|
|
case ':': type = 'dd'; break;
|
|
}
|
|
return {
|
|
type: type,
|
|
listStyle: bullets,
|
|
content: c
|
|
};
|
|
}
|
|
|
|
dtdd = bullets:list_char+
|
|
c:(inline_element / [^:\n])+
|
|
":"
|
|
d:(inline / anyline)
|
|
newline
|
|
{
|
|
// reject rule if bullets do not end in semicolon
|
|
if (bullets[bullets.length - 1] != ';') {
|
|
return null;
|
|
} else {
|
|
return {
|
|
type: 'dtdd',
|
|
content: [
|
|
{
|
|
type: 'dt',
|
|
listStyle: bullets,
|
|
content: c
|
|
}, {
|
|
type: 'dd',
|
|
listStyle: bullets.slice(0, bullets.length - 1) + ':',
|
|
content: d
|
|
}
|
|
|
|
]
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
list_char = [*#:;]
|
|
|
|
|
|
/* Tables */
|
|
|
|
table
|
|
= tas:table_start c:table_caption? b:table_body? table_end {
|
|
var res = {type: 'table'}
|
|
var body = b !== '' ? b : [];
|
|
if (c !== '') {
|
|
res.content = [c].concat(body);
|
|
} else {
|
|
res.content = body;
|
|
}
|
|
if (tas.length > 0) {
|
|
// FIXME: actually parse and build structure
|
|
res.attributes = { unparsed: tas }
|
|
}
|
|
dp(print_r(res));
|
|
return res;
|
|
}
|
|
|
|
table_start
|
|
= "{|"
|
|
& { forbiddenThingRegexp.push('(\n|^)\\||\\|[|\x7d+]'); return true; }
|
|
ta:table_attribs* space* newline? {
|
|
dp("table_start " + print_r(ta) + ", pos:" + pos);
|
|
return ta;
|
|
}
|
|
/ "{|" { forbiddenThingRegexp.pop('(\n|^)\\||\\|[|\x7d+]'); return null; }
|
|
|
|
table_attribs = anyline
|
|
|
|
table_caption
|
|
= "|+" c:inline* newline? {
|
|
return {
|
|
type: 'tableCaption',
|
|
content: c
|
|
}
|
|
}
|
|
|
|
table_body
|
|
= & { dp("table_body enter"); return true; }
|
|
firstrow:table_firstrow otherrows:table_row* {
|
|
/* dp('table first and otherrows: '
|
|
* + print_r([firstrow].concat(otherrows))); */
|
|
return [firstrow].concat(otherrows);
|
|
}
|
|
/ otherrows:table_row* {
|
|
//dp('table otherrows: ' + print_r(otherrows));
|
|
return otherrows;
|
|
}
|
|
|
|
table_firstrow
|
|
= td:table_data+ {
|
|
return {
|
|
type: 'tableRow',
|
|
content: td
|
|
};
|
|
}
|
|
|
|
table_row
|
|
= "|-" space* newline? td:(table_data / table_header)* {
|
|
return {
|
|
type: 'tableRow',
|
|
content: td
|
|
};
|
|
}
|
|
|
|
table_data
|
|
= & { dp("table_data enter, pos=" + pos); return true; }
|
|
"||" td_attr? td:anyblock* newline? {
|
|
//dp("table || result:" + print_r(td));
|
|
return {
|
|
type: 'tableCell',
|
|
content: td
|
|
};
|
|
}
|
|
/ & { dp("table_data : | enter pos=" + pos); return true; }
|
|
"|" ![}+-] td_attr? td:anyblock* newline? {
|
|
//dp("table | result:" + print_r(td));
|
|
return {
|
|
type: 'tableCell',
|
|
content: td
|
|
};
|
|
}
|
|
|
|
td_attr = a:[^\n|]+ "|" !"|" {
|
|
dp("td_attr: " + a.join(''));
|
|
return a.join('');
|
|
}
|
|
|
|
table_header
|
|
= "!!" c:(inline / ("!" !"!") / [^!\n])* newline? {
|
|
return {
|
|
type: 'tableHeader',
|
|
content: c
|
|
}
|
|
}
|
|
/ "!" c:(inline / text / '!' !'!' / [^!\n])* newline? {
|
|
return {
|
|
type: 'tableHeader',
|
|
content: c
|
|
}
|
|
}
|
|
|
|
table_end = "|}" newline? { forbiddenThingRegexp.pop('(\n|^)\\||\\|[|\x7d+]'); }
|
|
|
|
|
|
/* Wikidom TODO:
|
|
* split off text into content nodes
|
|
* convert inlines into annotations
|
|
* change contents into children
|
|
*/
|