mediawiki-extensions-Math/math/lexer.mll
Nicholas Longo a2b53e5a36 The following patches restore how texvc added braces for all commands
but \operatorname to fix Bug 31442.  More specifically, TEX_FUN1 now
adds braces as it did in previous versions.  The result is texvc will
once again correctly sanitize multiple math accents even when no braces are
given (e.g. \dot \vec B) and it will allow math accents to be used with
symbols whose font as been changed (e.g. \tilde \mathcal{M}).  Parser
tests are created to ensure these continue to work.  

The handling of \operatorname is now  handled specially by the
function TEX_FUN1nb (nb stands for no braces and the name is taken to
be consistent with the existing function TEX_FUN2nb).  The addition of
braces causes this command to LaTeX incorrectly.  The parser test for
\operatorname has been updated to reflect the fact the hash of the
images involved has changed.
2011-10-13 23:04:41 +00:00

120 lines
6.9 KiB
OCaml

{
open Parser
open Render_info
open Tex
}
let space = [' ' '\t' '\n' '\r']
let alpha = ['a'-'z' 'A'-'Z']
let literal_id = ['a'-'z' 'A'-'Z']
let literal_mn = ['0'-'9']
let literal_uf_lt = [',' ':' ';' '?' '!' '\'']
let delimiter_uf_lt = ['(' ')' '.']
let literal_uf_op = ['+' '-' '*' '=']
let delimiter_uf_op = ['/' '|']
let boxchars = ['0'-'9' 'a'-'z' 'A'-'Z' '+' '-' '*' ',' '=' '(' ')' ':' '/' ';' '?' '.' '!' '\'' '`' ' ' '\128'-'\255']
let aboxchars = ['0'-'9' 'a'-'z' 'A'-'Z' '+' '-' '*' ',' '=' '(' ')' ':' '/' ';' '?' '.' '!' '\'' '`' ' ']
let latex_function_names = "arccos" | "arcsin" | "arctan" | "arg" | "cos" | "cosh" | "cot" | "coth" | "csc"| "deg" | "det" | "dim" | "exp" | "gcd" | "hom" | "inf" | "ker" | "lg" | "lim" | "liminf" | "limsup" | "ln" | "log" | "max" | "min" | "Pr" | "sec" | "sin" | "sinh" | "sup" | "tan" | "tanh"
let mediawiki_function_names = "arccot" | "arcsec" | "arccsc" | "sgn" | "sen"
rule token = parse
space + { token lexbuf }
| "\\text" space * '{' boxchars + '}'
{ Texutil.tex_use_ams (); let str = Lexing.lexeme lexbuf in
let n = String.index str '{' + 1 in
BOX ("\\text", String.sub str n (String.length str - n - 1)) }
| "\\mbox" space * '{' aboxchars + '}'
{ let str = Lexing.lexeme lexbuf in
let n = String.index str '{' + 1 in
BOX ("\\mbox", String.sub str n (String.length str - n - 1)) }
| "\\hbox" space * '{' aboxchars + '}'
{ let str = Lexing.lexeme lexbuf in
let n = String.index str '{' + 1 in
BOX ("\\hbox", String.sub str n (String.length str - n - 1)) }
| "\\vbox" space * '{' aboxchars + '}'
{ let str = Lexing.lexeme lexbuf in
let n = String.index str '{' + 1 in
BOX ("\\vbox", String.sub str n (String.length str - n - 1)) }
| "\\mbox" space * '{' boxchars + '}'
{ let str = Lexing.lexeme lexbuf in
let n = String.index str '{' + 1 in
Texutil.tex_use_nonascii();
BOX ("\\mbox", String.sub str n (String.length str - n - 1)) }
| "\\hbox" space * '{' boxchars + '}'
{ let str = Lexing.lexeme lexbuf in
let n = String.index str '{' + 1 in
Texutil.tex_use_nonascii();
BOX ("\\hbox", String.sub str n (String.length str - n - 1)) }
| "\\vbox" space * '{' boxchars + '}'
{ let str = Lexing.lexeme lexbuf in
let n = String.index str '{' + 1 in
Texutil.tex_use_nonascii();
BOX ("\\vbox", String.sub str n (String.length str - n - 1)) }
| literal_id { let str = Lexing.lexeme lexbuf in LITERAL (MHTMLABLEC (FONT_IT, str,str,MI,str)) }
| literal_mn { let str = Lexing.lexeme lexbuf in LITERAL (MHTMLABLEC (FONT_RM, str,str,MN,str)) }
| literal_uf_lt { let str = Lexing.lexeme lexbuf in LITERAL (HTMLABLEC (FONT_UFH, str,str)) }
| delimiter_uf_lt { let str = Lexing.lexeme lexbuf in DELIMITER (HTMLABLEC (FONT_UFH, str,str)) }
| "-" { let str = Lexing.lexeme lexbuf in LITERAL (MHTMLABLEC (FONT_UFH,"-"," − ",MO,str))}
| literal_uf_op { let str = Lexing.lexeme lexbuf in LITERAL (MHTMLABLEC (FONT_UFH, str," "^str^" ",MO,str)) }
| delimiter_uf_op { let str = Lexing.lexeme lexbuf in DELIMITER (MHTMLABLEC (FONT_UFH, str," "^str^" ",MO,str)) }
| "\\operatorname" { Texutil.tex_use_ams(); FUN_AR1nb "\\operatorname" }
| "\\sqrt" space * "[" { FUN_AR1opt "\\sqrt" }
| "\\xleftarrow" space * "[" { Texutil.tex_use_ams(); FUN_AR1opt "\\xleftarrow" }
| "\\xrightarrow" space * "[" { Texutil.tex_use_ams(); FUN_AR1opt "\\xrightarrow" }
| "\\" (latex_function_names as name) space * "(" { LITERAL (HTMLABLEC(FONT_UFH,"\\" ^ name ^ "(", name ^ "(")) }
| "\\" (latex_function_names as name) space * "[" { LITERAL (HTMLABLEC(FONT_UFH,"\\" ^ name ^ "[", name ^ "[")) }
| "\\" (latex_function_names as name) space * "\\{" { LITERAL (HTMLABLEC(FONT_UFH,"\\" ^ name ^ "\\{", name ^ "{")) }
| "\\" (latex_function_names as name) space * { LITERAL (HTMLABLEC(FONT_UFH,"\\" ^ name ^ " ", name ^ " ")) }
| "\\" (mediawiki_function_names as name) space * "(" { (Texutil.tex_use_ams(); LITERAL (HTMLABLEC(FONT_UFH,"\\operatorname{" ^ name ^ "}(", name^ "("))) }
| "\\" (mediawiki_function_names as name) space * "[" { (Texutil.tex_use_ams(); LITERAL (HTMLABLEC(FONT_UFH,"\\operatorname{" ^ name ^ "}[", name^ "["))) }
| "\\" (mediawiki_function_names as name) space * "\\{" { (Texutil.tex_use_ams(); LITERAL (HTMLABLEC(FONT_UFH,"\\operatorname{" ^ name ^ "}\\{", name^ "{"))) }
| "\\" (mediawiki_function_names as name) space * { (Texutil.tex_use_ams(); LITERAL (HTMLABLEC(FONT_UFH,"\\operatorname{" ^ name ^ "} ", name ^ " "))) }
| "\\" alpha + { Texutil.find (Lexing.lexeme lexbuf) }
| "\\," { LITERAL (HTMLABLE (FONT_UF, "\\,"," ")) }
| "\\ " { LITERAL (HTMLABLE (FONT_UF, "\\ "," ")) }
| "\\;" { LITERAL (HTMLABLE (FONT_UF, "\\;"," ")) }
| "\\!" { LITERAL (TEX_ONLY "\\!") }
| "\\{" { DELIMITER (HTMLABLEC(FONT_UFH,"\\{","{")) }
| "\\}" { DELIMITER (HTMLABLEC(FONT_UFH,"\\}","}")) }
| "\\|" { DELIMITER (HTMLABLE (FONT_UFH,"\\|","||")) }
| "\\_" { LITERAL (HTMLABLEC(FONT_UFH,"\\_","_")) }
| "\\#" { LITERAL (HTMLABLE (FONT_UFH,"\\#","#")) }
| "\\%" { LITERAL (HTMLABLE (FONT_UFH,"\\%","%")) }
| "\\$" { LITERAL (HTMLABLE (FONT_UFH,"\\$","$")) }
| "\\&" { LITERAL (HTMLABLEC (FONT_RM,"\\&","&")) }
| "&" { NEXT_CELL }
| "\\\\" { NEXT_ROW }
| "\\begin{matrix}" { Texutil.tex_use_ams(); BEGIN__MATRIX }
| "\\end{matrix}" { END__MATRIX }
| "\\begin{pmatrix}" { Texutil.tex_use_ams(); BEGIN_PMATRIX }
| "\\end{pmatrix}" { END_PMATRIX }
| "\\begin{bmatrix}" { Texutil.tex_use_ams(); BEGIN_BMATRIX }
| "\\end{bmatrix}" { END_BMATRIX }
| "\\begin{Bmatrix}" { Texutil.tex_use_ams(); BEGIN_BBMATRIX }
| "\\end{Bmatrix}" { END_BBMATRIX }
| "\\begin{vmatrix}" { Texutil.tex_use_ams(); BEGIN_VMATRIX }
| "\\end{vmatrix}" { END_VMATRIX }
| "\\begin{Vmatrix}" { Texutil.tex_use_ams(); BEGIN_VVMATRIX }
| "\\end{Vmatrix}" { END_VVMATRIX }
| "\\begin{array}" { Texutil.tex_use_ams(); BEGIN_ARRAY }
| "\\end{array}" { END_ARRAY }
| "\\begin{align}" { Texutil.tex_use_ams(); BEGIN_ALIGN }
| "\\end{align}" { END_ALIGN }
| "\\begin{alignat}" { Texutil.tex_use_ams(); BEGIN_ALIGNAT }
| "\\end{alignat}" { END_ALIGNAT }
| "\\begin{smallmatrix}" { Texutil.tex_use_ams(); BEGIN_SMALLMATRIX }
| "\\end{smallmatrix}" { END_SMALLMATRIX }
| "\\begin{cases}" { Texutil.tex_use_ams(); BEGIN_CASES }
| "\\end{cases}" { END_CASES }
| '>' { LITERAL (HTMLABLEC(FONT_UFH,">"," > ")) }
| '<' { LITERAL (HTMLABLEC(FONT_UFH,"<"," &lt; ")) }
| '%' { LITERAL (HTMLABLEC(FONT_UFH,"\\%","%")) }
| '$' { LITERAL (HTMLABLEC(FONT_UFH,"\\$","$")) }
| '~' { LITERAL (HTMLABLE (FONT_UF, "~","&nbsp;")) }
| '[' { DELIMITER (HTMLABLEC(FONT_UFH,"[","[")) }
| ']' { SQ_CLOSE }
| '{' { CURLY_OPEN }
| '}' { CURLY_CLOSE }
| '^' { SUP }
| '_' { SUB }
| eof { EOF }