/* Produces output more or less compatible with FakeParser; plug it into FP's output and see */ { var dp = function ( msg ) { if ( false ) { console.log(msg); } } /* * Flags for specific parse environments (inside tables, links etc). Flags * trigger syntactic stops in the inline_breaks production, which * terminates inline and attribute matches. Flags merely reduce the number * of productions needed: The grammar is still context-free as the * productions can just be unrolled for all combinations of environments * at the cost of a much larger grammar. */ var syntaxFlags = {}; var setFlag = function(flag) { if (syntaxFlags[flag] !== undefined) { syntaxFlags[flag]++; } else { syntaxFlags[flag] = 1; } return true; } var clearFlag = function(flag) { syntaxFlags[flag]--; } /* Temporary debugging help. Is there anything similar in JS or a library? */ var print_r = function (arr, level) { var dumped_text = ""; if (!level) level = 0; //The padding given at the beginning of the line. var level_padding = ""; var bracket_level_padding = ""; for (var j = 0; j < level + 1; j++) level_padding += " "; for (var b = 0; b < level; b++) bracket_level_padding += " "; if (typeof(arr) == 'object') { //Array/Hashes/Objects dumped_text += "Array\n"; dumped_text += bracket_level_padding + "(\n"; for (var item in arr) { var value = arr[item]; if (typeof(value) == 'object') { //If it is an array, dumped_text += level_padding + "[" + item + "] => "; dumped_text += print_r(value, level + 2); } else { dumped_text += level_padding + "[" + item + "] => '" + value + "'\n"; } } dumped_text += bracket_level_padding + ")\n\n"; } else { //Strings/Chars/Numbers etc. dumped_text = "=>" + arr + "<=(" + typeof(arr) + ")"; } return dumped_text; } // Convert list prefixes to a list of WikiDom list styles var bulletsToTypes = function (bullets) { var bTypes = []; var blen = bullets.length; for (var i = 0; i < bullets.length; i++) { switch (bullets[i]) { case '*': bTypes.push('bullet'); break; case '#': bTypes.push('number'); break; case ';': bTypes.push('definitionterm'); break; case ':': bTypes.push('definitiondescription'); break; } } return bTypes; } } start = e:block* { var es = []; // flatten sub-arrays, as a list block can contain multiple lists for(var i = 0, length = e.length; i < length; i++) { var ei = e[i]; if ($.isArray(ei)) es = es.concat(ei); else es.push(ei); }; return { type: 'page', children: es } } anyblock = block / inline anyblockline = block / inlineline // All chars that cannot start syntactic structures text = t:[A-Za-z0-9,._ -]+ { return t.join('') } space = s:[ \t]+ { return s.join(''); } // Start of line sol = (newline / & { return pos === 0; } { return true; }) comment? newline = '\n' / '\r\n' block = block_lines / para / br / newline &newline / comment block_lines = h / table / lists / pre_indent /* Headings */ h = h1 / h2 / h3 / h4 / h5 / h6 h1 = sol '=' c:heading_text '=' &newline { return { type: 'heading', attributes: {level: 1}, text: c } } h2 = sol '==' c:heading_text '==' &newline { return { type: 'heading', attributes: {level: 2}, content: c } } h3 = sol '===' c:heading_text '===' &newline { return { type: 'heading', attributes: {level: 3}, content: c } } h4 = sol '====' c:heading_text '====' &newline { return { type: 'heading', attributes: {level: 4}, content: c } } h5 = sol '=====' c:heading_text '=====' &newline { return { type: 'heading', attributes: {level: 5}, content: c } } h6 = sol '======' c:heading_text '======' &newline { return { type: 'heading', attributes: {level: 6}, content: c } } heading_marker = '=' '='* heading_text = h:( !heading_marker x:(inlineline) { return x } )* { return h.join(''); } br = newline !newline { return {type: 'br'} } // TODO: convert inline content to annotations! para = sol c:inlineline cs:(!block_lines para)* { return {type: 'paragraph', content: c[0] } } pre_indent = l:pre_indent_line+ { return { type: 'pre', children: l } } pre_indent_line = sol space l:inlineline { return l } // Syntax that stops inline expansion inline_breaks = //& { console.log(print_r(syntaxFlags)); return true; } & { return syntaxFlags['table']; } a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + print_r(a)); return true; } / & { return syntaxFlags['italic']; } italic_marker { return true; } / & { return syntaxFlags['bold']; } bold_marker { return true; } / & { return syntaxFlags['linkdesc']; } link_end { return true; } inline = c:(text / inline_element / (!inline_breaks ch:. { return ch; }))+ { var out = []; var text = ''; for (var i = 0; i < c.length; i++) { if (typeof c[i] == 'string') { text += c[i]; } else { if (text.length) { out.push({ type: 'text', text: text }); text = ''; } out.push(c[i]); } } if (text.length) { out.push({ type: 'text', text: text }); } return out; } inlineline = c:(text / !inline_breaks (inline_element / [^\n]))+ { var out = []; var text = ''; dp("inlineline: " + print_r(c)); for (var i = 0; i < c.length; i++) { if (typeof c[i] == 'string') { text += c[i]; } else { if (text.length) { out.push({ type: 'text', text: text }); text = ''; } out.push(c[i]); } } if (text.length) { out.push({ text: text, //annotations: [] }); } return out; } /* TODO: convert all these to annotations! * -> need (start, end) offsets within block */ inline_element = comment / xmlish_tag / extlink / template / link / bold / italic comment = '' { return { type: 'comment', text: c.join('') } } comment_chars = c:[^-] { return c; } / c:'-' !'->' { return c; } extlink = "[" target:url " " text:extlink_text "]" { return { type: 'extlink', target: target, text: text } } // = "[" target:url text:extlink_text "]" { return { type: 'extlink', target: target, text: text } } url = proto:"http:" rest:([^ \]]+) { return proto + rest.join(''); } extlink_text = c:[^\]]+ { return c.join(''); } template = "{{" target:link_target params:("|" p:template_param { return p })* "}}" { var obj = { type: 'template', target: target }; if (params && params.length) { obj.params = params; } return obj; } template_param = name:template_param_name "=" c:template_param_text { return { name: name, content: c }; } / c:template_param_text { return { content: c }; } tplarg = "{{{" name:link_target params:("|" p:template_param { return p })* "}}}" { var obj = { type: 'tplarg', name: name }; if (params && params.length) { obj.params = params; } return obj; } template_param_name = h:( !"}}" x:([^=|]) { return x } )* { return h.join(''); } template_param_text = template_param_text_chunk* /* = h:( !"}}" x:([^|]) { return x } )* { return h.join(''); }*/ template_param_text_chunk = comment / xmlish_tag / extlink / template / link / bold / italic / !"}}" x:([^|]) { return x } link = "[[" target:link_target text:("|" link_text)* "]]" { var obj = { type: 'link', target: target }; if (text && text.length) { obj.text = text[0][1]; // ehhhh } return obj; } link_target = h:( !"]]" x:([^|]) { return x } )* { return h.join(''); } link_text = h:( & { return setFlag('linkdesc'); } x:inlineline { return x } )* { clearFlag('linkdesc') return h.join(''); } / & { clearFlag('linkdesc') } { return null; } link_end = "]]" bold = bold_marker & { dp('benter:' + pos); return setFlag('bold'); } c:inlineline bold_marker { clearFlag('bold'); return { type: 'b', text: c, } } / bold_marker { clearFlag('bold'); return null } bold_marker = "'''" italic = italic_marker & { dp('ienter:' + pos); return setFlag('italic'); } c:inlineline italic_marker { clearFlag('italic'); dp('ileave:' + pos); return { type: 'i', text: c } } / italic_marker { clearFlag('italic'); return null } italic_marker = "''" /* Will need to check anything xmlish agains known/allowed HTML tags and * registered extensions, otherwise fail the match. Should ref be treated as a * regular extension? */ xmlish_tag = ref / references ref = ref_full / ref_empty /* Can we do backreferences to genericize this? */ ref_full = start:ref_start ">" content:ref_content* close:ref_end { return { type: 'ext', name: 'ref', params: start.params, ws: start.ws, content: content, close: close } } ref_empty = start:ref_start close:(space* "/>") { return { type: 'ext', name: 'ref', ws: start.ws, params: start.params, close: close } } ref_start = "") { return all.join(''); } ref_content = !ref_end a:(inline) { return a; } /* fixme probably have to programatically add these */ references = references_full / references_empty references_full = start:references_start ">" content:references_content* close:references_end { return { type: 'ext', name: 'references', params: start.params, ws: start.ws, content: content, close: close } } references_empty = start:references_start close:(space* "/>") { return { type: 'ext', name: 'references', ws: start.ws, params: start.params, close: close } } references_start = "") { return all.join(''); } references_content = !references_end a:(inline) { return a; } ext_param = space* name:ext_param_name "=" val:ext_param_val { val.name = name; return val; } ext_param_name = name:[a-zA-Z0-9-]+ { return name.join(''); } ext_param_val = t:[0-9A-Za-z]+ { return {text: t.join('') } } / "'" t:[^'>]+ "'" { return { quote: "'", text: t.join('') } } / '"' t:[^">]+ '"' { return { quote: '"', text: t.join('') } } lists = es:(dtdd / li)+ { // Flatten es var esLen = es.length; var flatEs = []; for (var i = 0; i < esLen; i++) { var ei = es[i]; if ($.isArray(ei)) { flatEs = flatEs.concat(ei); } else { flatEs.push(ei); } } return { type: 'list', children: flatEs } } li = sol bullets:list_char+ c:inlineline &newline { return { type: 'listItem', attributes: { styles: bulletsToTypes(bullets) }, content: c[0] }; } dtdd = sol bullets:list_char+ c:(inline_element / [^:\n])+ ":" d:(inline_element / [^\n])+ &newline { // reject rule if bullets do not end in semicolon if (bullets[bullets.length - 1] != ';') { return null; } else { return [ { type: 'listItem', attributes: {styles: bulletsToTypes(bullets)}, content: {text: c.join('')} }, { type: 'listItem', attributes: {styles: bulletsToTypes( bullets.slice(0, bullets.length - 1) + ':')}, content: {text: d.join('')} } ] } } list_char = [*#:;] /* Tables */ table = tas:table_start c:table_caption? b:table_body? table_end { var res = {type: 'table'} var body = b !== '' ? b : []; if (c !== '') { res.children = [c].concat(body); } else { res.children = body; } if (tas.length > 0) { // FIXME: actually parse and build structure res.attributes = { unparsed: tas } } //dp(print_r(res)); return res; } table_start = sol "{|" & { setFlag('table'); return true; } ta:table_attribs* space* { //dp("table_start " + print_r(ta) + ", pos:" + pos); return ta; } / sol "{|" { clearFlag('table'); return null; } table_attribs = text / ! inline_breaks !newline . table_caption = newline "|+" c:inline* { return { type: 'tableCaption', content: c[0] } } table_body = & { dp("table_body enter"); return true; } firstrow:table_firstrow otherrows:table_row* { /* dp('table first and otherrows: ' * + print_r([firstrow].concat(otherrows))); */ return [firstrow].concat(otherrows); } / otherrows:table_row* { //dp('table otherrows: ' + print_r(otherrows)); return otherrows; } table_firstrow = td:table_data+ { return { type: 'tableRow', children: td }; } table_row = & { dp("table row enter"); return true; } newline "|-" thtd_attribs? space* td:(table_data / table_header)* { return { type: 'tableRow', children: td }; } table_data = & { dp("table_data enter, pos=" + pos); return true; } ("||" / newline "|") ! [}+-] a:thtd_attribs? td:(!inline_breaks anyblock)* { dp("table data result: " + print_r(td) + ", attribts: " + print_r(a)); return { type: 'tableCell', attributes: { unparsed: a }, children: td }; } table_header = ("!!" / newline "!") a:thtd_attribs? c:inline { return { type: 'tableHeading', attributes: { unparsed: a }, children: c } } thtd_attribs // In particular, do not match [|\n] = a:(text / ! inline_breaks [="':;/,.-] )+ "|" ! [|}+-] { return a; } table_end = newline? "|}" { clearFlag('table'); } /* Wikidom TODO: * split off text into content nodes * convert inlines into annotations * change contents into children */