Move Parsoid implementation of ImageMap from Parsoid repo

* There have been no changes to the code except for:
  - class is now called ParsoidImageMap with a matching file name
  - the namespace is changed to MediaWiki\Extension\ImageMap.
  - adding phan directives to deal with Parsoid's DOM proxies
    which phan doesn't seem to be see here.

* Parsoid's SiteConfig extension registration code simply overwrites
  the extConfig array for a tag name. So, keeping the config and
  implementation identical ensures that no matter which version of
  code is registered (based on load order), we get the same behavior.

* Once this rolls out on the next train, we can remove the code
  from Parsoid.

* Future patches might refactor the two implementations to extract
  common functionality or merge code into a single file.

Change-Id: I6cffa4420d2daa70174fa926d5c7a4d983792969
This commit is contained in:
Subramanya Sastry 2023-09-19 15:25:49 -05:00
parent 7a2d263b39
commit d9a7fea019
2 changed files with 411 additions and 1 deletions

View file

@ -1,6 +1,9 @@
{
"name": "ImageMap",
"author": "Tim Starling",
"author": [
"Tim Starling",
"Arlo Breault"
],
"url": "https://www.mediawiki.org/wiki/Extension:ImageMap",
"descriptionmsg": "imagemap_desc",
"license-name": "GPL-2.0-or-later",
@ -41,5 +44,8 @@
"styles": "resources/ext.imagemap.styles.less"
}
},
"ParsoidModules": [
"MediaWiki\\Extension\\ImageMap\\ParsoidImageMap"
],
"manifest_version": 2
}

View file

@ -0,0 +1,404 @@
<?php
declare( strict_types = 1 );
namespace MediaWiki\Extension\ImageMap;
use DOMNode;
use Wikimedia\Parsoid\DOM\DocumentFragment;
use Wikimedia\Parsoid\DOM\Element;
use Wikimedia\Parsoid\Ext\DOMDataUtils;
use Wikimedia\Parsoid\Ext\DOMUtils;
use Wikimedia\Parsoid\Ext\ExtensionError;
use Wikimedia\Parsoid\Ext\ExtensionModule;
use Wikimedia\Parsoid\Ext\ExtensionTagHandler;
use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;
use Wikimedia\Parsoid\Ext\WTUtils;
use Wikimedia\Parsoid\Utils\DOMCompat;
/**
* This is an adaptation of the existing ImageMap extension of the legacy
* parser.
*
* Syntax:
* <imagemap>
* Image:Foo.jpg | 100px | picture of a foo
*
* rect 0 0 50 50 [[Foo type A]]
* circle 50 50 20 [[Foo type B]]
*
* desc bottom-left
* </imagemap>
*
* Coordinates are relative to the source image, not the thumbnail.
*/
class ParsoidImageMap extends ExtensionTagHandler implements ExtensionModule {
private const TOP_RIGHT = 0;
private const BOTTOM_RIGHT = 1;
private const BOTTOM_LEFT = 2;
private const TOP_LEFT = 3;
private const NONE = 4;
private const DESC_TYPE_MAP = [
'top-right', 'bottom-right', 'bottom-left', 'top-left'
];
/** @inheritDoc */
public function getConfig(): array {
return [
'name' => 'ImageMap',
'tags' => [
[
'name' => 'imagemap',
'handler' => self::class,
'options' => [
'outputHasCoreMwDomSpecMarkup' => true
],
]
]
];
}
/** @inheritDoc */
public function sourceToDom(
ParsoidExtensionAPI $extApi, string $src, array $extArgs
): DocumentFragment {
$domFragment = $extApi->getTopLevelDoc()->createDocumentFragment();
$thumb = null;
$anchor = null;
$imageNode = null;
$mapHTML = null;
// Define canonical desc types to allow i18n of 'imagemap_desc_types'
$descTypesCanonical = 'top-right, bottom-right, bottom-left, top-left, none';
$descType = self::BOTTOM_RIGHT;
$scale = 1;
$lineNum = 0;
$first = true;
$defaultLinkAttribs = null;
$nextOffset = $extApi->extTag->getOffsets()->innerStart();
$lines = explode( "\n", $src );
foreach ( $lines as $line ) {
++$lineNum;
$offset = $nextOffset;
// +1 for the nl
$nextOffset = $offset + strlen( $line ) + 1;
$offset += strlen( $line ) - strlen( ltrim( $line ) );
$line = trim( $line );
if ( $line == '' || $line[0] == '#' ) {
continue;
}
if ( $first ) {
$first = false;
// The first line should have an image specification on it
// Extract it and render the HTML
$bits = explode( '|', $line, 2 );
if ( count( $bits ) == 1 ) {
$image = $bits[0];
$options = '';
} else {
list( $image, $options ) = $bits;
$options = '|' . $options;
}
$imageOpts = [
[ $options, $offset + strlen( $image ) ],
];
$thumb = $extApi->renderMedia(
$image, $imageOpts, $error,
// NOTE(T290044): Imagemaps are always rendered as blocks
true
);
if ( !$thumb ) {
throw new ExtensionError( $error );
}
$anchor = $thumb->firstChild;
$imageNode = $anchor->firstChild;
// Could be a span
if ( DOMCompat::nodeName( $imageNode ) !== 'img' ) {
throw new ExtensionError( 'imagemap_invalid_image' );
}
DOMUtils::assertElt( $imageNode );
// Add the linear dimensions to avoid inaccuracy in the scale
// factor when one is much larger than the other
// (sx+sy)/(x+y) = s
$thumbWidth = (int)( $imageNode->getAttribute( 'width' ) ?? '' );
$thumbHeight = (int)( $imageNode->getAttribute( 'height' ) ?? '' );
$imageWidth = (int)( $imageNode->getAttribute( 'data-file-width' ) ?? '' );
$imageHeight = (int)( $imageNode->getAttribute( 'data-file-height' ) ?? '' );
$denominator = $imageWidth + $imageHeight;
$numerator = $thumbWidth + $thumbHeight;
if ( $denominator <= 0 || $numerator <= 0 ) {
throw new ExtensionError( 'imagemap_invalid_image' );
}
$scale = $numerator / $denominator;
continue;
}
// Handle desc spec
$cmd = strtok( $line, " \t" );
if ( $cmd == 'desc' ) {
$typesText = $descTypesCanonical;
// FIXME: Support this ...
// $typesText = wfMessage( 'imagemap_desc_types' )->inContentLanguage()->text();
// if ( $descTypesCanonical != $typesText ) {
// // i18n desc types exists
// $typesText = $descTypesCanonical . ', ' . $typesText;
// }
$types = array_map( 'trim', explode( ',', $typesText ) );
$type = trim( strtok( '' ) ?: '' );
$descType = array_search( $type, $types, true );
if ( $descType > 4 ) {
// A localized descType is used. Subtract 5 to reach the canonical desc type.
$descType -= 5;
}
// <0? In theory never, but paranoia...
if ( $descType === false || $descType < 0 ) {
throw new ExtensionError( 'imagemap_invalid_desc', $typesText );
}
continue;
}
// Find the link
$link = trim( strstr( $line, '[' ) ?: '' );
if ( !$link ) {
throw new ExtensionError( 'imagemap_no_link', $lineNum );
}
// FIXME: Omits DSR offsets, which will be more relevant when VE
// supports HTML editing of maps.
$linkFragment = $extApi->wikitextToDOM(
$link,
[
'parseOpts' => [
'extTag' => 'imagemap',
'context' => 'inline',
],
// Create new frame, because $link doesn't literally
// appear on the page, it has been hand-crafted here
'processInNewFrame' => true
],
// sol
true
);
$a = DOMCompat::querySelector( $linkFragment, 'a' );
if ( $a == null ) {
// Meh, might be for other reasons
throw new ExtensionError( 'imagemap_invalid_title', $lineNum );
}
DOMUtils::assertElt( $a );
$href = $a->getAttribute( 'href' ) ?? '';
$externLink = DOMUtils::matchRel( $a, '#^mw:ExtLink/#D' ) !== null;
$alt = '';
$hasContent = $externLink || ( DOMDataUtils::getDataParsoid( $a )->stx ?? null ) === 'piped';
if ( $hasContent ) {
// FIXME: The legacy extension does ad hoc link parsing, which
// results in link content not interpreting wikitext syntax.
// Here we produce a known difference by just taking the text
// content of the resulting dom.
// See the test, "Link with wikitext syntax in content"
$alt = trim( $a->textContent );
}
$shapeSpec = substr( $line, 0, -strlen( $link ) );
// Tokenize shape spec
$shape = strtok( $shapeSpec, " \t" );
switch ( $shape ) {
case 'default':
$coords = [];
break;
case 'rect':
$coords = self::tokenizeCoords( $lineNum, 4 );
break;
case 'circle':
$coords = self::tokenizeCoords( $lineNum, 3 );
break;
case 'poly':
$coords = self::tokenizeCoords( $lineNum, 1, true );
if ( count( $coords ) % 2 !== 0 ) {
throw new ExtensionError( 'imagemap_poly_odd', $lineNum );
}
break;
default:
$coords = [];
throw new ExtensionError( 'imagemap_unrecognised_shape', $lineNum );
}
// Scale the coords using the size of the source image
foreach ( $coords as $i => $c ) {
$coords[$i] = (int)round( $c * $scale );
}
// Construct the area tag
$attribs = [ 'href' => $href ];
if ( $externLink ) {
$attribs['class'] = 'plainlinks';
// FIXME: T186241
// if ( $wgNoFollowLinks ) {
// $attribs['rel'] = 'nofollow';
// }
}
if ( $shape != 'default' ) {
$attribs['shape'] = $shape;
}
if ( $coords ) {
$attribs['coords'] = implode( ',', $coords );
}
if ( $alt != '' ) {
if ( $shape != 'default' ) {
$attribs['alt'] = $alt;
}
$attribs['title'] = $alt;
}
if ( $shape == 'default' ) {
$defaultLinkAttribs = $attribs;
} else {
if ( $mapHTML == null ) {
$mapHTML = $domFragment->ownerDocument->createElement( 'map' );
}
$area = $domFragment->ownerDocument->createElement( 'area' );
foreach ( $attribs as $key => $val ) {
$area->setAttribute( $key, $val );
}
$mapHTML->appendChild( $area );
}
}
// Ugh! This is messy.
// The proxy classes aren't visible to phan here.
// Maybe we should get rid of those since we are unlikely
// to go the Dodo route since there is a proposal to introduce
// a HTML5 parsing and updated DOM library in newer PHP versions.
//
// Help out phan since it doesn't seem to be able to look
// at the definitions in vendor?
'@phan-var Element $thumb';
'@phan-var DOMNode $anchor';
'@phan-var Element $imageNode';
if ( $first ) {
throw new ExtensionError( 'imagemap_no_image' );
}
if ( $mapHTML != null ) {
// Construct the map
// Add a hash of the map HTML to avoid breaking cached HTML fragments that are
// later joined together on the one page (T18471).
// The only way these hashes can clash is if the map is identical, in which
// case it wouldn't matter that the "wrong" map was used.
$mapName = 'ImageMap_' . substr( md5( DOMCompat::getInnerHTML( $mapHTML ) ), 0, 16 );
$mapHTML->setAttribute( 'name', $mapName );
// Alter the image tag
$imageNode->setAttribute( 'usemap', "#$mapName" );
$thumb->insertBefore( $mapHTML, $imageNode->parentNode->nextSibling );
}
// For T22030
DOMCompat::getClassList( $thumb )->add( 'noresize' );
// Determine whether a "magnify" link is present
$typeOf = $thumb->getAttribute( 'typeof' ) ?? '';
if ( !preg_match( '#\bmw:File/Thumb\b#', $typeOf ) && $descType !== self::NONE ) {
// The following classes are used here:
// * mw-ext-imagemap-desc-top-right
// * mw-ext-imagemap-desc-bottom-right
// * mw-ext-imagemap-desc-bottom-left
// * mw-ext-imagemap-desc-top-left
DOMCompat::getClassList( $thumb )->add(
'mw-ext-imagemap-desc-' . self::DESC_TYPE_MAP[$descType]
);
}
if ( $defaultLinkAttribs ) {
$defaultAnchor = $domFragment->ownerDocument->createElement( 'a' );
foreach ( $defaultLinkAttribs as $name => $value ) {
$defaultAnchor->setAttribute( $name, $value );
}
} else {
$defaultAnchor = $domFragment->ownerDocument->createElement( 'span' );
}
$defaultAnchor->appendChild( $imageNode );
$thumb->replaceChild( $defaultAnchor, $anchor );
if ( !WTUtils::hasVisibleCaption( $thumb ) ) {
$caption = DOMCompat::querySelector( $thumb, 'figcaption' );
$captionText = trim( $caption->textContent );
if ( $captionText ) {
$defaultAnchor->setAttribute( 'title', $captionText );
}
}
$extApi->getMetadata()->addModules( $this->getModules() );
$extApi->getMetadata()->addModuleStyles( $this->getModuleStyles() );
$domFragment->appendChild( $thumb );
return $domFragment;
}
/**
* @param int $lineNum Line number, for error reporting
* @param int $minCount Minimum token count
* @param bool $allowNegative
* @return array Array of coordinates
* @throws ExtensionError
*/
private static function tokenizeCoords(
int $lineNum, int $minCount = 0, $allowNegative = false
) {
$coords = [];
$coord = strtok( " \t" );
while ( $coord !== false ) {
if ( !is_numeric( $coord ) || $coord > 1e9 || ( !$allowNegative && $coord < 0 ) ) {
throw new ExtensionError( 'imagemap_invalid_coord', $lineNum );
}
$coords[] = $coord;
$coord = strtok( " \t" );
}
if ( count( $coords ) < $minCount ) {
// TODO: Should this also check there aren't too many coords?
throw new ExtensionError( 'imagemap_missing_coord', $lineNum );
}
return $coords;
}
/**
* @return array
*/
public function getModules(): array {
return [ 'ext.imagemap' ];
}
/**
* @return array
*/
public function getModuleStyles(): array {
return [ 'ext.imagemap.styles' ];
}
}