From 3d0dfe1e4390329285627735515e70088a9695ba Mon Sep 17 00:00:00 2001 From: Stegmujo Date: Fri, 1 Dec 2023 14:29:36 +0000 Subject: [PATCH] Fix mathcal in chrome * map to unicode chars Bug: T352536 Change-Id: Iab04043df5cc04484d348b0c896a50c94ef79c16 --- src/WikiTexVC/MMLmappings/BaseParsing.php | 35 +++++++++++-------- .../MMLmappings/Util/MMLParsingUtil.php | 30 ++++++++++++++-- src/WikiTexVC/Nodes/Literal.php | 20 ++++++----- .../phpunit/unit/WikiTexVC/MMLRenderTest.php | 8 +++++ 4 files changed, 68 insertions(+), 25 deletions(-) diff --git a/src/WikiTexVC/MMLmappings/BaseParsing.php b/src/WikiTexVC/MMLmappings/BaseParsing.php index e90d05f65..70c59c773 100644 --- a/src/WikiTexVC/MMLmappings/BaseParsing.php +++ b/src/WikiTexVC/MMLmappings/BaseParsing.php @@ -202,7 +202,7 @@ class BaseParsing { $menclose = new MMLmenclose( "", [ "notation" => $notation ] ); $inner = $menclose->encapsulateRaw( - $node->getArg2()->renderMML() ) . $mpAdded->encapsulateRaw( $node->getArg1()->renderMML() ); + $node->getArg2()->renderMML() ) . $mpAdded->encapsulateRaw( $node->getArg1()->renderMML() ); return $mrow->encapsulateRaw( $msup->encapsulateRaw( $inner ) ); } @@ -236,7 +236,7 @@ class BaseParsing { $inner = $mrow->encapsulateRaw( $whatIsThis . $mstyle->encapsulateRaw( $mrow->encapsulateRaw( $node->getArg1()->renderMML() ) ) ) . $mrow->encapsulateRaw( $whatIsThis . $mstyle->encapsulateRaw( - $mrow->encapsulateRaw( $node->getArg2()->renderMML() ) ) ); + $mrow->encapsulateRaw( $node->getArg2()->renderMML() ) ) ); return $mrow->encapsulateRaw( $mfrac->encapsulateRaw( $inner ) ); } @@ -253,7 +253,7 @@ class BaseParsing { } public static function genFrac( $node, $passedArgs, $operatorContent, $name, - $left = null, $right = null, $thick = null, $style = null ) { + $left = null, $right = null, $thick = null, $style = null ) { // Actually this is in AMSMethods, consider refactoring left, right, thick, style $bm = new BaseMethods(); $ret = $bm->checkAndParseDelimiter( $name, $node, $passedArgs, $operatorContent, true ); @@ -527,14 +527,14 @@ class BaseParsing { $mspace = new MMLmspace( "", [ "width" => "0px","height" => ".25em", "depth" => "0px","mathbackground" => "black" ] ); return $mtext->encapsulateRaw( " " ) . - $mrowRel->encapsulateRaw( $mover->encapsulateRaw( - $mrowOp->encapsulateRaw( + $mrowRel->encapsulateRaw( $mover->encapsulateRaw( + $mrowOp->encapsulateRaw( $mrowOrd->encapsulateRaw( $mpadded->encapsulateRaw( $mo->encapsulateRaw( "⟵" ) ) ) . - $mspace->getEmpty() ) . - $mrowOrd->encapsulateRaw( - $mo->encapsulateRaw( "⟶" ) - ) ) ); + $mspace->getEmpty() ) . + $mrowOrd->encapsulateRaw( + $mo->encapsulateRaw( "⟶" ) + ) ) ); } // Removed all token based parsing, since macro resolution for the supported macros can be hardcoded in php @@ -672,7 +672,7 @@ class BaseParsing { if ( $node instanceof Fun2 ) { return $start . $mfrac->encapsulateRaw( $mrow->encapsulateRaw( $node->getArg1()->renderMML() ) . $mrow->encapsulateRaw( $node->getArg2()->renderMML() ) ) - . $tail; + . $tail; } $inner = ""; foreach ( $node->getArgs() as $arg ) { @@ -842,10 +842,16 @@ class BaseParsing { $mrow = new MMLmrow( TexClass::ORD, [] ); $args = MMLParsingUtil::getFontArgs( $name, $mathvariant, $passedArgs ); $state = []; + + // Unicode fixes for the operators if ( $mathvariant == Variants::DOUBLESTRUCK ) { - // Unicode fix for the operators $state = [ "double-struck-literals" => true ]; + } elseif ( $mathvariant == Variants::CALLIGRAPHIC ) { + $state = [ "calligraphic" => true ]; + } elseif ( $mathvariant == Variants::BOLDCALLIGRAPHIC ) { + $state = [ "bold-calligraphic" => true ]; } + if ( $node instanceof Fun1nb ) { // Only one mrow from Fun1nb !? return $mrow->encapsulateRaw( $node->getArg()->renderMML( $args, $state ) ); @@ -979,7 +985,7 @@ class BaseParsing { } $mrow = new MMLmrow( TexClass::ORD, [] ); $opParsed = ( $operatorContent != null && $operatorContent["limits"] ) - ? $operatorContent["limits"]->renderMML( $argsOp ) : ""; + ? $operatorContent["limits"]->renderMML( $argsOp ) : ""; if ( $node instanceof DQ ) { $munder = new MMLmunder(); @@ -987,7 +993,7 @@ class BaseParsing { } elseif ( $node instanceof FQ ) { $munderOver = new MMLmunderover(); return $munderOver->encapsulateRaw( $opParsed . $mrow->encapsulateRaw( $node->getDown()->renderMML() ) - . $mrow->encapsulateRaw( $node->getUp()->renderMML() ) ); + . $mrow->encapsulateRaw( $node->getUp()->renderMML() ) ); } } @@ -1036,7 +1042,8 @@ class BaseParsing { $end2 = $mrowEnd->encapsulateRaw( $operatorContent["sideset"]->getUp()->renderMML() ); return $mmlMrow->encapsulateRaw( $mmlMunderOver->encapsulateRaw( $mstyle->encapsulateRaw( - $mmlMultiscripts->encapsulateRaw( $opParsed . $in2 . "" . $in1 ) ) . $end1 . $end2 ) ); + $mmlMultiscripts->encapsulateRaw( $opParsed . $in2 . "" . $in1 ) ) + . $end1 . $end2 ) ); } $merror = new MMLmerror(); diff --git a/src/WikiTexVC/MMLmappings/Util/MMLParsingUtil.php b/src/WikiTexVC/MMLmappings/Util/MMLParsingUtil.php index d51634cd5..f3db0fb0e 100644 --- a/src/WikiTexVC/MMLmappings/Util/MMLParsingUtil.php +++ b/src/WikiTexVC/MMLmappings/Util/MMLParsingUtil.php @@ -126,9 +126,33 @@ class MMLParsingUtil { 'y' => '𝕪', 'z' => '𝕫' ]; - // Replace each character in the input string with its double-struck Unicode equivalent - return preg_replace_callback( '/[A-Za-z0-9]/', static function ( $matches ) use ( $map ) { - return $map[$matches[0]] ?? $matches[0]; + return self::matchAlphanumeric( $inputString, $map ); + } + + public static function mapToCaligraphicUnicode( $inputString ) { + $map = [ + '0' => '𝟎', '1' => '𝟏', '2' => '𝟐', '3' => '𝟑', '4' => '𝟒', + '5' => '𝟓', '6' => '𝟔', '7' => '𝟕', '8' => '𝟖', '9' => '𝟗', + 'A' => '𝒜', 'B' => 'ℬ', 'C' => '𝒞', 'D' => '𝒟', 'E' => 'ℰ', + 'F' => 'ℱ', 'G' => '𝒢', 'H' => 'ℋ', 'I' => 'ℐ', 'J' => '𝒥', + 'K' => '𝒦', 'L' => 'ℒ', 'M' => 'ℳ', 'N' => '𝒩', 'O' => '𝒪', + 'P' => '𝒫', 'Q' => '𝒬', 'R' => 'ℛ', 'S' => '𝒮', 'T' => '𝒯', + 'U' => '𝒰', 'V' => '𝒱', 'W' => '𝒲', 'X' => '𝒳', 'Y' => '𝒴', + 'Z' => '𝒵', 'a' => '𝒶', 'b' => '𝒷', 'c' => '𝒸', 'd' => '𝒹', + 'e' => 'ℯ', 'f' => '𝒻', 'g' => 'ℊ', 'h' => '𝒽', 'i' => '𝒾', + 'j' => '𝒿', 'k' => '𝓀', 'l' => '𝓁', 'm' => '𝓂', 'n' => '𝓃', + 'o' => 'ℴ', 'p' => '𝓅', 'q' => '𝓆', 'r' => '𝓇', 's' => '𝓈', + 't' => '𝓉', 'u' => '𝓊', 'v' => '𝓋', 'w' => '𝓌', 'x' => '𝓍', + 'y' => '𝓎', 'z' => '𝓏' + ]; + + return self::matchAlphanumeric( $inputString, $map ); + } + + public static function matchAlphanumeric( $inputString, $map ) { + // Replace each character in the input string with its caligraphic Unicode equivalent + return preg_replace_callback( '/[A-Za-z0-9]/u', static function ( $matches ) use ( $map ) { + return $map[$matches[0]] ?? $matches[0]; }, $inputString ); } } diff --git a/src/WikiTexVC/Nodes/Literal.php b/src/WikiTexVC/Nodes/Literal.php index fe825dff7..ad5098703 100644 --- a/src/WikiTexVC/Nodes/Literal.php +++ b/src/WikiTexVC/Nodes/Literal.php @@ -30,12 +30,16 @@ class Literal extends TexNode { array_push( $this->extendedLiterals, '\\infty', '\\emptyset' ); } - public function changeInputDoubleStruckChars( $input, $state ) { - /** If it's definitely a literal, and it is double struck, map it to double-struck unicode - * for correct rendering in Chrome, see https://phabricator.wikimedia.org/T352196 + public function changeUnicodeFontInput( $input, $state ) { + /** + * In some font modifications, it is required to explicitly use unicode + * characters instead of (only) attributes in MathML to indicate the font. + * This is mostly because of Chrome behaviour. I.e. see: https://phabricator.wikimedia.org/T352196 */ if ( isset( $state["double-struck-literals"] ) ) { return MMLParsingUtil::mapToDoubleStruckUnicode( $input ); + } elseif ( isset( $state["calligraphic"] ) ) { + return MMLParsingUtil::mapToCaligraphicUnicode( $input ); } return $input; } @@ -48,7 +52,7 @@ class Literal extends TexNode { } if ( is_numeric( $this->arg ) ) { $mn = new MMLmn( "", $arguments ); - return $mn->encapsulateRaw( $this->changeInputDoubleStruckChars( $this->arg, $state ) ); + return $mn->encapsulateRaw( $this->changeUnicodeFontInput( $this->arg, $state ) ); } // is important to split and find chars within curly and differentiate, see tc 459 $foundOperatorContent = MMLutil::initalParseLiteralExpression( $this->arg ); @@ -95,8 +99,8 @@ class Literal extends TexNode { // Sieve for Makros $ret = BaseMethods::checkAndParse( $inputP, $arguments, - array_merge( $operatorContent ?? [], $state ?? [] ), - $this, false ); + array_merge( $operatorContent ?? [], $state ?? [] ), + $this, false ); if ( $ret ) { return $ret; @@ -114,7 +118,7 @@ class Literal extends TexNode { // If falling through all sieves just create an MI element $mi = new MMLmi( "", $arguments ); - return $mi->encapsulateRaw( $this->changeInputDoubleStruckChars( $input, $state ) ); // $this->arg + return $mi->encapsulateRaw( $this->changeUnicodeFontInput( $input, $state ) ); // $this->arg } /** @@ -162,7 +166,7 @@ class Literal extends TexNode { if ( preg_match( $regexp, $s ) == 1 ) { return [ $s ]; } elseif ( in_array( $s, $lit, true ) ) { - return [ $s ]; + return [ $s ]; } else { return []; } diff --git a/tests/phpunit/unit/WikiTexVC/MMLRenderTest.php b/tests/phpunit/unit/WikiTexVC/MMLRenderTest.php index e0b26e7fc..f3fad3820 100644 --- a/tests/phpunit/unit/WikiTexVC/MMLRenderTest.php +++ b/tests/phpunit/unit/WikiTexVC/MMLRenderTest.php @@ -15,6 +15,14 @@ use MediaWikiUnitTestCase; * @covers \MediaWiki\Extension\Math\WikiTexVC\TexVC */ class MMLRenderTest extends MediaWikiUnitTestCase { + public function testMathCalUnicode() { + $input = "\\mathcal{O}, \\mathcal{K}, \\mathcal{t}, \\mathcal{c}"; + $mathMLtexVC = $this->generateMML( $input ); + $this->assertStringContainsString( '𝒪', $mathMLtexVC ); + $this->assertStringContainsString( '𝒦', $mathMLtexVC ); + $this->assertStringContainsString( '𝓉', $mathMLtexVC ); + $this->assertStringContainsString( '𝒸', $mathMLtexVC ); + } public function testDoubleStruckLiteralUnicode() { $input = "\\mathbb{Q}, \\R, \\Complex, \\mathbb{4}";