Separate block-level tags in tokenizer to delimit inlines and avoid wrapping

block-level in paragraphs.
This commit is contained in:
Gabriel Wicke 2011-11-25 17:41:26 +00:00
parent 859379a635
commit 6b8c109cf0

View file

@ -404,6 +404,8 @@ block
/ pre
/ comment &eolf
/ para
/ pre
/ block_tag
/ (s:sol {
if (s) {
return [s, {type: 'NEWLINE'}];
@ -424,7 +426,7 @@ block_line
/ lists
/ pre_indent
/ pre
/ space* !nowiki generic_tag space* &eolf // XXX: only match block elements!
/ space* block_tag space* &eolf
/* Headings */
@ -743,7 +745,7 @@ pre
return [ {type: 'TAG', name: 'pre', attribs: attribs} ]
.concat(ts, [{type: 'ENDTAG', name: 'pre'}]);
}
/ "</pre>" { return {type: 'TEXT', value: "</pre>"}; }
/// "</pre>" { return {type: 'TEXT', value: "</pre>"}; }
nowiki
= "<nowiki>" nc:nowiki_content "</nowiki>" { return nc }
@ -764,6 +766,30 @@ nowiki_content
return [{type: 'TEXT', value: ts.join('')}];
}
// See http://dev.w3.org/html5/spec/Overview.html#syntax-tag-name and
// following paragraphs
block_tag
= "<" end:"/"? name:block_name
attribs:generic_attribute*
selfclose:"/"?
">" {
var res = {name: name, attribs: attribs};
if ( end != '' ) {
res.type = 'ENDTAG';
} else if ( selfclose != '' ) {
res.type = 'SELFCLOSINGTAG';
} else {
res.type = 'TAG';
}
return res;
}
block_name
= "p" / "table" / "td" / "tr" / "ul" / "ol"
/ "li" / "dt" / "dd" / "div" / "center"
/ "blockquote"
// See http://dev.w3.org/html5/spec/Overview.html#syntax-tag-name and
// following paragraphs
generic_tag