mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-12-04 18:58:37 +00:00
461c76981f
At first I was going for a more minimal replacement of mw.Uri with URL, until I discovered that this code depends on a mw.Uri bug that would be difficult to replicate: // Expected: Relative URLs are accepted new mw.Uri( '/foo' ).toString() // => 'https://localhost/foo' // Expected: Protocol is optional new mw.Uri( 'example.com/foo' ).toString() // => 'https://example.com/foo' // Unexpected: Treated as empty domain with no protocol rather than relative URL new mw.Uri( './foo' ).toString() // => 'https://./foo' So I went for a bigger rewrite to preserve the intent rather than the exact logic. I had to change some test cases to use more realistic fake data. They previously relied on bugs in our URL handling to pass despite the base URLs being incorrect, particularly for non-short URLs (see T270219). In my testing non-short URLs behave the same as before in practice. Depends-On: I07a8c097dba0f5572c0aedf4febdf1434063ea6f Bug: T325249 Change-Id: I232361266c1dda795b88018c3aaa3d9ecbe42b93
368 lines
13 KiB
JavaScript
368 lines
13 KiB
JavaScript
/*!
|
|
* Parsoid utilities.
|
|
*
|
|
* @copyright 2011-2020 VisualEditor Team and others; see http://ve.mit-license.org
|
|
*/
|
|
|
|
mw.libs.ve = mw.libs.ve || {};
|
|
|
|
/**
|
|
* Decode a URI component into a mediawiki article title
|
|
*
|
|
* N.B. Illegal article titles can result from fairly reasonable input (e.g. "100%25beef");
|
|
* see https://phabricator.wikimedia.org/T137847 .
|
|
*
|
|
* @param {string} s String to decode
|
|
* @param {boolean} [preserveUnderscores=false] Don't convert underscores to spaces
|
|
* @return {string} Decoded string, or original string if decodeURIComponent failed
|
|
*/
|
|
mw.libs.ve.decodeURIComponentIntoArticleTitle = function ( s, preserveUnderscores ) {
|
|
try {
|
|
s = decodeURIComponent( s );
|
|
} catch ( e ) {
|
|
return s;
|
|
}
|
|
if ( preserveUnderscores ) {
|
|
return s;
|
|
}
|
|
return s.replace( /_/g, ' ' );
|
|
};
|
|
|
|
/**
|
|
* Unwrap Parsoid sections
|
|
*
|
|
* @param {HTMLElement} element Parent element, e.g. document body
|
|
* @param {string} [keepSection] Section to keep
|
|
*/
|
|
mw.libs.ve.unwrapParsoidSections = function ( element, keepSection ) {
|
|
Array.prototype.forEach.call( element.querySelectorAll( 'section[data-mw-section-id]' ), function ( section ) {
|
|
var parent = section.parentNode,
|
|
sectionId = section.getAttribute( 'data-mw-section-id' );
|
|
// Copy section ID to first child (should be a heading)
|
|
// Pseudo-sections (with negative section IDs) may not have a heading
|
|
if ( sectionId !== null && +sectionId > 0 ) {
|
|
section.firstChild.setAttribute( 'data-mw-section-id', sectionId );
|
|
}
|
|
if ( keepSection !== undefined && sectionId === keepSection ) {
|
|
return;
|
|
}
|
|
while ( section.firstChild ) {
|
|
parent.insertBefore( section.firstChild, section );
|
|
}
|
|
parent.removeChild( section );
|
|
} );
|
|
};
|
|
|
|
/**
|
|
* Strip legacy (non-HTML5) IDs; typically found as section IDs inside
|
|
* headings.
|
|
*
|
|
* @param {HTMLElement} element Parent element, e.g. document body
|
|
*/
|
|
mw.libs.ve.stripParsoidFallbackIds = function ( element ) {
|
|
Array.prototype.forEach.call( element.querySelectorAll( 'span[typeof="mw:FallbackId"][id]:empty' ), function ( legacySpan ) {
|
|
legacySpan.parentNode.removeChild( legacySpan );
|
|
} );
|
|
};
|
|
|
|
mw.libs.ve.restbaseIdRegExp = /^mw[a-zA-Z0-9\-_]{2,6}$/;
|
|
|
|
mw.libs.ve.stripRestbaseIds = function ( doc ) {
|
|
var restbaseIdRegExp = mw.libs.ve.restbaseIdRegExp;
|
|
Array.prototype.forEach.call( doc.querySelectorAll( '[id^="mw"]' ), function ( element ) {
|
|
if ( restbaseIdRegExp.test( element.id ) ) {
|
|
element.removeAttribute( 'id' );
|
|
}
|
|
} );
|
|
};
|
|
|
|
/**
|
|
* Re-duplicate deduplicated TemplateStyles, for correct rendering when editing a section or
|
|
* when templates are removed during the edit.
|
|
*
|
|
* @param {HTMLElement} element Parent element, e.g. document body
|
|
*/
|
|
mw.libs.ve.reduplicateStyles = function ( element ) {
|
|
Array.prototype.forEach.call( element.querySelectorAll( 'link[rel~="mw-deduplicated-inline-style"]' ), function ( link ) {
|
|
var href = link.getAttribute( 'href' );
|
|
if ( !href || href.slice( 0, 'mw-data:'.length ) !== 'mw-data:' ) {
|
|
return;
|
|
}
|
|
var key = href.slice( 'mw-data:'.length );
|
|
var style = element.querySelector( 'style[data-mw-deduplicate="' + key + '"]' );
|
|
if ( !style ) {
|
|
return;
|
|
}
|
|
|
|
var newStyle = link.ownerDocument.createElement( 'style' );
|
|
newStyle.setAttribute( 'data-mw-deduplicate', key );
|
|
|
|
// Copy content from the old `style` node (for rendering)
|
|
for ( var i = 0; i < style.childNodes.length; i++ ) {
|
|
newStyle.appendChild( style.childNodes[ i ].cloneNode( true ) );
|
|
}
|
|
// Copy attributes from the old `link` node (for selser)
|
|
Array.prototype.forEach.call( link.attributes, function ( attr ) {
|
|
if ( attr.name !== 'rel' && attr.name !== 'href' ) {
|
|
newStyle.setAttribute( attr.name, attr.value );
|
|
}
|
|
} );
|
|
|
|
link.parentNode.replaceChild( newStyle, link );
|
|
} );
|
|
|
|
Array.prototype.forEach.call( element.querySelectorAll( 'style[data-mw-deduplicate]:empty' ), function ( style ) {
|
|
var key = style.getAttribute( 'data-mw-deduplicate' );
|
|
var firstStyle = element.querySelector( 'style[data-mw-deduplicate="' + key + '"]' );
|
|
if ( !firstStyle || firstStyle === style ) {
|
|
return;
|
|
}
|
|
|
|
// Copy content from the first matching `style` node (for rendering)
|
|
for ( var i = 0; i < firstStyle.childNodes.length; i++ ) {
|
|
style.appendChild( firstStyle.childNodes[ i ].cloneNode( true ) );
|
|
}
|
|
} );
|
|
};
|
|
|
|
/**
|
|
* De-duplicate TemplateStyles, like Parsoid does.
|
|
*
|
|
* @param {HTMLElement} element Parent element, e.g. document body
|
|
*/
|
|
mw.libs.ve.deduplicateStyles = function ( element ) {
|
|
/**
|
|
* Check whether `node` is in a fosterable position. (Nodes in these positions may be moved
|
|
* elsewhere in the DOM by the HTML5 parsing algorithm, if they don't have the right tag name.)
|
|
* https://html.spec.whatwg.org/#appropriate-place-for-inserting-a-node
|
|
*
|
|
* @private
|
|
* @param {Node|null} node
|
|
* @return {boolean}
|
|
*/
|
|
function isFosterablePosition( node ) {
|
|
var fosterablePositions = [ 'table', 'thead', 'tbody', 'tfoot', 'tr' ];
|
|
return node && fosterablePositions.indexOf( node.parentNode.nodeName.toLowerCase() ) !== -1;
|
|
}
|
|
|
|
var styleTagKeys = {};
|
|
|
|
Array.prototype.forEach.call( element.querySelectorAll( 'style[data-mw-deduplicate]' ), function ( style ) {
|
|
var key = style.getAttribute( 'data-mw-deduplicate' );
|
|
|
|
if ( !styleTagKeys[ key ] ) {
|
|
// Not a dupe
|
|
styleTagKeys[ key ] = true;
|
|
return;
|
|
}
|
|
|
|
if ( !isFosterablePosition( style ) ) {
|
|
// Dupe - replace with a placeholder <link> reference
|
|
var link = style.ownerDocument.createElement( 'link' );
|
|
link.setAttribute( 'rel', 'mw-deduplicated-inline-style' );
|
|
link.setAttribute( 'href', 'mw-data:' + key );
|
|
|
|
// Copy attributes from the old `link` node (for selser)
|
|
Array.prototype.forEach.call( style.attributes, function ( attr ) {
|
|
if ( attr.name !== 'rel' && attr.name !== 'data-mw-deduplicate' ) {
|
|
link.setAttribute( attr.name, attr.value );
|
|
}
|
|
} );
|
|
|
|
style.parentNode.replaceChild( link, style );
|
|
} else {
|
|
// Duplicate style tag found in fosterable position.
|
|
// Not deduping it (to avoid corruption when the resulting HTML is parsed: T299767),
|
|
// but emptying out the style tag for consistency with Parsoid.
|
|
// Parsoid says it does this for performance reasons.
|
|
style.innerHTML = '';
|
|
}
|
|
} );
|
|
};
|
|
|
|
/**
|
|
* Fix fragment links which should be relative to the current document
|
|
*
|
|
* This prevents these links from trying to navigate to another page,
|
|
* or open in a new window.
|
|
*
|
|
* Call this after ve.targetLinksToNewWindow, as it removes the target attribute.
|
|
* Call this after LinkCache.styleParsoidElements, as it breaks that method by including the query string.
|
|
*
|
|
* @param {HTMLElement} container Parent element, e.g. document body
|
|
* @param {mw.Title} docTitle Current title, only links to this title will be normalized
|
|
* @param {string} [prefix] Prefix to add to fragment and target ID to avoid collisions
|
|
*/
|
|
mw.libs.ve.fixFragmentLinks = function ( container, docTitle, prefix ) {
|
|
var docTitleText = docTitle.getPrefixedText();
|
|
prefix = prefix || '';
|
|
Array.prototype.forEach.call( container.querySelectorAll( 'a[href*="#"]' ), function ( el ) {
|
|
var fragment = null;
|
|
if ( el.getAttribute( 'href' )[ 0 ] === '#' ) {
|
|
// Legacy parser
|
|
fragment = el.getAttribute( 'href' ).slice( 1 );
|
|
} else {
|
|
// Parsoid HTML
|
|
var targetData = mw.libs.ve.getTargetDataFromHref( el.href, el.ownerDocument );
|
|
|
|
if ( targetData.isInternal ) {
|
|
var title = mw.Title.newFromText( targetData.title );
|
|
if ( title && title.getPrefixedText() === docTitleText ) {
|
|
fragment = new URL( el.href ).hash.slice( 1 );
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( fragment !== null ) {
|
|
if ( !fragment ) {
|
|
// Special case for empty fragment, even if prefix set
|
|
el.setAttribute( 'href', '#' );
|
|
} else {
|
|
if ( prefix ) {
|
|
var target = container.querySelector( '#' + $.escapeSelector( fragment ) );
|
|
// There may be multiple links to a specific target, so check the target
|
|
// hasn't already been fixed (in which case it would be null)
|
|
if ( target ) {
|
|
target.setAttribute( 'id', prefix + fragment );
|
|
target.setAttribute( 'data-mw-id-fixed', '' );
|
|
}
|
|
}
|
|
el.setAttribute( 'href', '#' + prefix + fragment );
|
|
}
|
|
el.removeAttribute( 'target' );
|
|
}
|
|
} );
|
|
// Remove any section heading anchors which weren't fixed above (T218492)
|
|
Array.prototype.forEach.call( container.querySelectorAll( 'h1, h2, h3, h4, h5, h6' ), function ( el ) {
|
|
if ( el.hasAttribute( 'id' ) && !el.hasAttribute( 'data-mw-id-fixed' ) ) {
|
|
el.removeAttribute( 'id' );
|
|
}
|
|
} );
|
|
};
|
|
|
|
/**
|
|
* Parse URL to get title it points to.
|
|
*
|
|
* @param {string} href
|
|
* @param {HTMLDocument} doc Document whose base URL to use
|
|
* @return {Object} Information about the given href
|
|
* @return {string} [return.title]
|
|
* The title of the internal link (if the href is internal)
|
|
* @return {boolean} return.isInternal
|
|
* True if the href pointed to the local wiki, false if href is external
|
|
*/
|
|
mw.libs.ve.getTargetDataFromHref = function ( href, doc ) {
|
|
function regexEscape( str ) {
|
|
return str.replace( /([.?*+^$[\]\\(){}|-])/g, '\\$1' );
|
|
}
|
|
|
|
function returnExternalData() {
|
|
return { isInternal: false };
|
|
}
|
|
|
|
function returnInternalData( titleish ) {
|
|
// This value doesn't necessarily come from Parsoid (and it might not have the "./" prefix), but
|
|
// this method will work fine.
|
|
var data = mw.libs.ve.parseParsoidResourceName( titleish );
|
|
data.isInternal = true;
|
|
return data;
|
|
}
|
|
|
|
var url = new URL( href, doc.baseURI );
|
|
|
|
// Equivalent to `ve.init.platform.getExternalLinkUrlProtocolsRegExp()`, which can not be called here
|
|
var externalLinkUrlProtocolsRegExp = new RegExp( '^(' + mw.config.get( 'wgUrlProtocols' ) + ')', 'i' );
|
|
// We don't want external links that don't start with a registered external URL protocol
|
|
// (to avoid generating 'javascript:' URLs), so treat it as internal
|
|
if ( !externalLinkUrlProtocolsRegExp.test( url.toString() ) ) {
|
|
return returnInternalData( url.toString() );
|
|
}
|
|
|
|
// Strip red link query parameters
|
|
if ( url.searchParams.get( 'action' ) === 'edit' && url.searchParams.get( 'redlink' ) === '1' ) {
|
|
url.searchParams.delete( 'action' );
|
|
url.searchParams.delete( 'redlink' );
|
|
}
|
|
// Count remaining query parameters
|
|
var keys = [];
|
|
url.searchParams.forEach( function ( val, key ) {
|
|
keys.push( key );
|
|
} );
|
|
var queryLength = keys.length;
|
|
|
|
var relativeHref = url.toString().replace( /^https?:/i, '' );
|
|
// Check if this matches the server's script path (as used by red links)
|
|
var scriptBase = new URL( mw.config.get( 'wgScript' ), doc.baseURI ).toString().replace( /^https?:/i, '' );
|
|
if ( relativeHref.indexOf( scriptBase ) === 0 ) {
|
|
if ( queryLength === 1 && url.searchParams.get( 'title' ) ) {
|
|
return returnInternalData( url.searchParams.get( 'title' ) + url.hash );
|
|
}
|
|
}
|
|
|
|
// Check if this matches the server's article path
|
|
var articleBase = new URL( mw.config.get( 'wgArticlePath' ), doc.baseURI ).toString().replace( /^https?:/i, '' );
|
|
var articleBaseRegex = new RegExp( regexEscape( articleBase ).replace( regexEscape( '$1' ), '(.*)' ) );
|
|
var matches = relativeHref.match( articleBaseRegex );
|
|
if ( matches ) {
|
|
if ( queryLength === 0 && matches && matches[ 1 ].split( '#' )[ 0 ].indexOf( '?' ) === -1 ) {
|
|
// Take the relative path
|
|
return returnInternalData( matches[ 1 ] );
|
|
}
|
|
}
|
|
|
|
// Doesn't match any of the known URL patterns, or has extra parameters
|
|
return returnExternalData();
|
|
};
|
|
|
|
/**
|
|
* Encode a page title into a Parsoid resource name.
|
|
*
|
|
* @param {string} title
|
|
* @return {string}
|
|
*/
|
|
mw.libs.ve.encodeParsoidResourceName = function ( title ) {
|
|
// Parsoid: Sanitizer::sanitizeTitleURI, Env::makeLink
|
|
var idx = title.indexOf( '#' );
|
|
var anchor = null;
|
|
if ( idx !== -1 ) {
|
|
anchor = title.slice( idx + 1 );
|
|
title = title.slice( 0, idx );
|
|
}
|
|
var encodedTitle = title.replace( /[%? [\]#|<>]/g, function ( match ) {
|
|
return mw.util.wikiUrlencode( match );
|
|
} );
|
|
if ( anchor !== null ) {
|
|
encodedTitle += '#' + mw.util.escapeIdForLink( anchor );
|
|
}
|
|
return './' + encodedTitle;
|
|
};
|
|
|
|
/**
|
|
* Split Parsoid resource name into the href prefix and the page title.
|
|
*
|
|
* @param {string} resourceName Resource name, from a `href` or `resource` attribute
|
|
* @return {Object} Object with the following properties:
|
|
* @return {string} return.title Full page title in text form (with namespace, and spaces instead of underscores)
|
|
*/
|
|
mw.libs.ve.parseParsoidResourceName = function ( resourceName ) {
|
|
// Resource names are always prefixed with './' to prevent the MediaWiki namespace from being
|
|
// interpreted as a URL protocol, consider e.g. 'href="./File:Foo.png"'.
|
|
// (We accept input without the prefix, so this can also take plain page titles.)
|
|
var matches = resourceName.match( /^(\.\/|)(.*)$/ );
|
|
return {
|
|
// '%' and '?' are valid in page titles, but normally URI-encoded. This also changes underscores
|
|
// to spaces.
|
|
title: mw.libs.ve.decodeURIComponentIntoArticleTitle( matches[ 2 ] )
|
|
};
|
|
};
|
|
|
|
/**
|
|
* Extract the page title from a Parsoid resource name.
|
|
*
|
|
* @param {string} resourceName Resource name, from a `href` or `resource` attribute
|
|
* @return {string} Full page title in text form (with namespace, and spaces instead of underscores)
|
|
*/
|
|
mw.libs.ve.normalizeParsoidResourceName = function ( resourceName ) {
|
|
return mw.libs.ve.parseParsoidResourceName( resourceName ).title;
|
|
};
|