<?php

namespace MediaWiki\Extension\Math\WikiTexVC\MMLmappings\Util;

use MediaWiki\Extension\Math\WikiTexVC\XMLNode;

/**
 * Algorithm to make a simple, but customizable comparison of two MathML-Strings for automated testing.
 * It compares all element keys in the tree without order and calculates an F-score based on similarity.
 * Also, it gives information the elements which differ both trees.
 *
 * @author Johannes Stegmüller
 */
class MMLComparator {

	/**
	 * These keys for mathml-elements get not
	 * considered for similarity comparison.
	 */
	private const IGNOREDELEMENTKEYS = [
		"semantics",
		"annotation",
		"annotation-xml",
		"csymbol",
		"mrow",
		"mstyle"
	];

	/**
	 * Although the keys mentioned here may be on the ignore list.
	 * Keys containing one of the specific attribute get checked.
	 */
	private const CHECKEXPLICITLY = [
		"mstyle" => [ "mathcolor" ]
	];

	/**
	 * Attributes which are not considered for similarity checks.
	 * Attributes are always defined for a specific mathml key.
	 */
	private const IGNOREDATTRIBUTES = [
		"mrow" => [ "class", "data-mjx-texclass" ],
		"math" => [ "alttext", "display" ],
		"mi"   => [ "class", "data-mjx-variant", "mathvariant", "data-mjx-texclass", "data-mjx-alternate" ],
		"mo"   => [ "data-mjx-pseudoscript", "stretchy", "fence", "data-mjx-texclass", "texClass", "class",
			"data-mjx-alternate", "form", "accent", "lspace", "rspace", "xref", "id" ],
		"mtext" => [ "texClass", "class" ],
		"mspace" => [ "width" ]
	];

	public static function functionObtainTreeInBrackets( $mml ) {
		$xml = simplexml_load_string( $mml );
		$nodes = self::xmlToNode( $xml );
		return self::convertToBracketFormat( $nodes );
	}

	/**
	 * Compares the base to the comparison MathML.
	 * The base is considered the reference MathML. Calculates an F-Score for similarity of elements.
	 * MathML elements which are not considered for similarity checks can be specified  within this class.
	 * Gives back the differing elements relative to the base.
	 * If any issue with the input, the returned F-Score has value '-0.0'
	 *
	 * @param string $mmlBase MathML base element as string
	 * @param string $mmlComp MathML comparison element as string
	 * @return array with 'similarityF': Calculated F-Score for similarity, 'more/less': diffs relative to the base.
	 */
	public function compareMathML( $mmlBase, $mmlComp ): array {
		$compRes = [
			"similarityF" => -0.0,
			"more" => [],
			"less" => []
		];

		if ( !$mmlBase || !$mmlComp || trim( $mmlBase ) == "" || trim( $mmlComp ) == "" ) {
			return $compRes;
		}

		$xmlBase = simplexml_load_string( $mmlBase );
		$xmlComp = simplexml_load_string( $mmlComp );

		// Create flattened Arrays of MathML
		$mbase = $this->createComparisonArray( $xmlBase );
		$mcomp = $this->createComparisonArray( $xmlComp );

		// Compare base and comp arrays, tbd: eventually extract this function
		$countsBase = $this->countArraySize( $mbase );
		$countsComp = $this->countArraySize( $mcomp );
		$overallRelevantSum = $countsBase[0];
		$overallRetrievedSum = $countsComp[0];

		$ctrSame = $this->compareMathMLKeyArrays( $compRes, $mbase, $mcomp );

		$compRes['similarityF'] = $this->calculateFscore( $ctrSame, $overallRelevantSum, $overallRetrievedSum );
		return $compRes;
	}

	public static function xmlToNode( $xml ) {
		$node = new XMLNode( $xml->getName() );

		foreach ( $xml->children() as $child ) {
			$node->children[] = self::xmlToNode( $child );
		}

		return $node;
	}

	private function treeEditDistance( $root1, $root2 ) {
		$n = count( $root1->children );
		$m = count( $root2->children );

		$dp = [];
		for ( $i = 0; $i <= $n; $i++ ) {
			$dp[$i] = [];
			for ( $j = 0; $j <= $m; $j++ ) {
				$dp[$i][$j] = 0;
			}
		}

		for ( $i = 0; $i <= $n; $i++ ) {
			for ( $j = 0; $j <= $m; $j++ ) {
				if ( $i == 0 ) {
					$dp[$i][$j] = $j;
				} elseif ( $j == 0 ) {
					$dp[$i][$j] = $i;
				} elseif ( $root1->children[$i - 1]->value == $root2->children[$j - 1]->value ) {
					$dp[$i][$j] = $dp[$i - 1][$j - 1];
				} else {
					$dp[$i][$j] = 1 + min(
							$dp[$i][$j - 1], // Insert
							$dp[$i - 1][$j], // Remove
							$dp[$i - 1][$j - 1] // Replace
						);
				}
			}
		}

		$ted = $dp[$n][$m];
		$totalNodes = $n + $m; // Total nodes in both trees
		if ( $totalNodes != 0 ) {
			$normalizedTED = $ted / $totalNodes;
		} else {
			$normalizedTED = -1;
		}

		return [ "TED" => $ted, "normalizedTED" => $normalizedTED ];
	}

	public static function convertToBracketFormat( $root ) {
		if ( empty( $root->children ) ) {
			return "{" . $root->value . "}";
		}

		$result = $root->value . '{';

		foreach ( $root->children as $child ) {
			$result .= self::convertToBracketFormat( $child );
		}

		$result .= '}';
		return $result;
	}

	private function compareMathMLKeyArrays( &$compRes, $mbase, $mcomp ) {
		$intersections = 0;
		foreach ( $mbase as $key => $baseElement ) {
			$compElement = $mcomp[$key] ?? null;
			if ( $compElement == null ) {
				// The base has this mml element(s), but not the comparison
				continue;
			}
			$compRet = $this->compareArrays( $baseElement, $compElement );
			$intersections += $compRet['sameCtr'];
			if ( count( $compRet['more'] ) > 0 ) {
				$compRes['more'][$key] = $compRet['more'];
			}
			if ( count( $compRet['less'] ) > 0 ) {
				$compRes['less'][$key] = $compRet['less'];
			}
		}
		return $intersections;
	}

	private function calculateFscore( $intersection, $sumRelevant, $sumRetrieved ) {
		if ( $sumRelevant == 0 && $sumRetrieved == 0 ) {
			return 1.0;
		}

		if ( $sumRetrieved == 0 && $sumRelevant != 0 ) {
			return 0;
		}

		if ( $sumRelevant == 0 && $sumRetrieved != 0 ) {
			$recall = 1;
		} else {
			$recall = $intersection / $sumRelevant;
		}

		$prec = $intersection / $sumRetrieved;
		if ( ( $prec + $recall ) == 0 ) {
			return 0;
		}
		// Calculate F-Score
		return 2 * ( ( $prec * $recall ) / ( $prec + $recall ) );
	}

	private function countArraySize( $arr ): array {
		$overallSize = 0;
		$overallAttrs = 0;
		foreach ( $arr as $element ) {
			foreach ( $element as $attrs ) {
				$overallSize += 1;
				$overallAttrs += count( $attrs );
			}
		}

		return [ $overallSize, $overallAttrs ];
	}

	private function compareArrays( $base, $comp ): array {
		$sameCtr = 0;
		$compRet = [
			"sameCtr" => 0,
			"less" => [],
			"more" => [],
		];

		// Return score how many same or very similar items are in array
		foreach ( $base as $keyBase => $baseEl ) {
			foreach ( $comp as $keyComp => $compEl ) {
				if ( $compEl == -1 ) {
					continue;
				}
				if ( $this->compareTwoBaseElements( $compEl, $baseEl ) ) {
					$base[$keyBase] = -1;
					$comp[$keyComp] = -1;
					$sameCtr += 1;
					break;
				}
			}
		}
		$compRet['sameCtr'] = $sameCtr;
		$compRet = $this->addRemainingElements( $compRet, 'less', $base );
		return $this->addRemainingElements( $compRet, 'more', $comp );
	}

	private function addRemainingElements( $compRet, $key, $data ): array {
		foreach ( $data as $el ) {
			if ( $el !== null && $el != -1 ) {
				$compRet[$key][] = $el;
			}
		}
		return $compRet;
	}

	private function compareTwoBaseElements( $compEl, $baseEl ): bool {
		// Most basic comparison, this works for elements without attributes
		if ( $compEl === $baseEl ) {
			return true;
		}
		// The array and sub-elements are exactly the same
		if ( array_diff( $compEl, $baseEl ) == [] && array_diff( $baseEl, $compEl ) == [] ) {
			return true;
		}

		return false;
	}

	private function createComparisonArray( $xml ): array {
		$allKeys = [];
		$this->prepareMMLElements( $xml, $allKeys );
		$finalMMLKeys = $this->filterKeys( $allKeys );
		return $finalMMLKeys;
	}

	private function filterKeys( $allMMLElements ): array {
		$finalKeys = [];
		foreach ( $allMMLElements as $key => $element ) {
			if ( in_array( $key, self::IGNOREDELEMENTKEYS, true ) ) {
				$finalKeys = $this->checkExplicitKeys( $key, $element, $finalKeys );
				continue;
			}
			$finalKeys[$key] = $element;
		}
		return $finalKeys;
	}

	private function prepareMMLElements( $xml, &$finalArray, $alwaysSetVal = false ) {
		foreach ( $xml as $key => $element ) {
			if ( $element instanceof \SimpleXMLElement ) {
				$el = $this->filterAttributes( $key, $element->attributes() );
				$value = trim( (string)$element );
				if ( $alwaysSetVal || $value !== "" ) {
					$el["val"] = $value;
				}
				$finalArray[$key][] = $el ?? [];

			} else {
				$finalArray[$key][] = $element;
			}
			$this->prepareMMLElements( $element, $finalArray );
		}
	}

	private function filterAttributes( $elementKey, $attributes ): array {
		$ignoredAttrs = self::IGNOREDATTRIBUTES[$elementKey] ?? [];
		$finalAttributes = [];
		foreach ( $attributes as $akey => $aval ) {

			if ( in_array( $akey, $ignoredAttrs, true ) ) {
				continue;
			}
			$finalAttributes[$akey] = $aval[0];
		}
		return $finalAttributes;
	}

	private function checkExplicitKeys( $key, $element, array $finalKeys ): array {
		$check = self::CHECKEXPLICITLY[$key] ?? null;
		if ( $check ) {
			foreach ( $element as $added ) {
				foreach ( $check as $checkEl ) {
					if ( array_key_exists( $checkEl, $added ) ) {
						$finalKeys[$key] = $element;
					}
				}

			}
		}
		return $finalKeys;
	}
}