mirror of
synced 2024-12-12 14:35:28 +00:00
In MediaWiki, section numbers may be prefixed with 'T-' if they refer
to sections on a transcluded page, so they are not really numbers.
Change e2cb9ce93e
caused us to treat them as strings most of the time,
but it looks like there are several places where we treated them as
integer numbers, which I missed when making that patch.
The first two changes in ArticleTarget#restoreEditSection fix T248795
and T248968/T249112, respectively. The other changes are cleanup.
Bug: T248795
Bug: T248968
Bug: T249112
Change-Id: I8373a7ab515595769ce6f3051a182c922415b643
252 lines
8.8 KiB
252 lines
8.8 KiB
* Parsoid utilities.
* @copyright 2011-2020 VisualEditor Team and others; see http://ve.mit-license.org
mw.libs.ve = mw.libs.ve || {};
* Resolve a URL relative to a given base.
* Copied from ve.resolveUrl
* @param {string} url URL to resolve
* @param {HTMLDocument} base Document whose base URL to use
* @return {string} Resolved URL
mw.libs.ve.resolveUrl = function ( url, base ) {
var node = base.createElement( 'a' );
node.setAttribute( 'href', url );
// If doc.baseURI isn't set, node.href will be an empty string
// This is crazy, returning the original URL is better
return node.href || url;
* Decode a URI component into a mediawiki article title
* N.B. Illegal article titles can result from fairly reasonable input (e.g. "100%25beef");
* see https://phabricator.wikimedia.org/T137847 .
* @param {string} s String to decode
* @param {boolean} [preserveUnderscores] Don't convert underscores to spaces
* @return {string} Decoded string, or original string if decodeURIComponent failed
mw.libs.ve.decodeURIComponentIntoArticleTitle = function ( s, preserveUnderscores ) {
try {
s = decodeURIComponent( s );
} catch ( e ) {
return s;
if ( preserveUnderscores ) {
return s;
return s.replace( /_/g, ' ' );
* Unwrap Parsoid sections
* @param {HTMLElement} element Parent element, e.g. document body
* @param {string} [keepSection] Section to keep
mw.libs.ve.unwrapParsoidSections = function ( element, keepSection ) {
Array.prototype.forEach.call( element.querySelectorAll( 'section[data-mw-section-id]' ), function ( section ) {
var parent = section.parentNode,
sectionId = section.getAttribute( 'data-mw-section-id' );
// Copy section ID to first child (should be a heading)
if ( sectionId !== '0' ) {
section.firstChild.setAttribute( 'data-mw-section-id', sectionId );
if ( keepSection !== undefined && sectionId === keepSection ) {
while ( section.firstChild ) {
parent.insertBefore( section.firstChild, section );
parent.removeChild( section );
} );
* Strip legacy (non-HTML5) IDs; typically found as section IDs inside
* headings.
* @param {HTMLElement} element Parent element, e.g. document body
mw.libs.ve.stripParsoidFallbackIds = function ( element ) {
Array.prototype.forEach.call( element.querySelectorAll( 'span[typeof="mw:FallbackId"][id]:empty' ), function ( legacySpan ) {
legacySpan.parentNode.removeChild( legacySpan );
} );
* Fix fragment links which should be relative to the current document
* This prevents these links from trying to navigate to another page,
* or open in a new window.
* Call this after ve.targetLinksToNewWindow, as it removes the target attribute.
* Call this after LinkCache.styleParsoidElements, as it breaks that method by including the query string.
* @param {HTMLElement} container Parent element, e.g. document body
* @param {mw.Title} docTitle Current title, only links to this title will be normalized
* @param {string} [prefix] Prefix to add to fragment and target ID to avoid collisions
mw.libs.ve.fixFragmentLinks = function ( container, docTitle, prefix ) {
var docTitleText = docTitle.getPrefixedText();
prefix = prefix || '';
Array.prototype.forEach.call( container.querySelectorAll( 'a[href*="#"]' ), function ( el ) {
var target, title,
fragment = new mw.Uri( el.href ).fragment,
targetData = mw.libs.ve.getTargetDataFromHref( el.href, el.ownerDocument );
if ( targetData.isInternal ) {
title = mw.Title.newFromText( targetData.title );
if ( title && title.getPrefixedText() === docTitleText ) {
if ( !fragment ) {
// Special case for empty fragment, even if prefix set
el.setAttribute( 'href', '#' );
} else {
if ( prefix ) {
target = container.querySelector( '#' + $.escapeSelector( fragment ) );
// There may be multiple links to a specific target, so check the target
// hasn't already been fixed (in which case it would be null)
if ( target ) {
target.setAttribute( 'id', prefix + fragment );
el.setAttribute( 'href', '#' + prefix + fragment );
el.removeAttribute( 'target' );
} );
* Parse URL to get title it points to.
* @param {string} href
* @param {HTMLDocument|string} doc Document whose base URL to use, or base URL as a string.
* @return {Object} Information about the given href
* @return {string} return.title
* The title of the internal link, else the original href if href is external
* @return {string} return.rawTitle
* The title without URL decoding and underscore normalization applied
* @return {boolean} return.isInternal
* True if the href pointed to the local wiki, false if href is external
mw.libs.ve.getTargetDataFromHref = function ( href, doc ) {
var relativeBase, relativeBaseRegex, relativeHref, isInternal, matches, data, uri;
function regexEscape( str ) {
return str.replace( /([.?*+^$[\]\\(){}|-])/g, '\\$1' );
// Protocol relative href
relativeHref = href.replace( /^https?:/i, '' );
// Paths without a host portion are assumed to be internal
isInternal = !/^\/\//.test( relativeHref );
// Check if this matches the server's article path
// Protocol relative base
relativeBase = mw.libs.ve.resolveUrl( mw.config.get( 'wgArticlePath' ), doc ).replace( /^https?:/i, '' );
relativeBaseRegex = new RegExp( regexEscape( relativeBase ).replace( regexEscape( '$1' ), '(.*)' ) );
matches = relativeHref.match( relativeBaseRegex );
if ( matches && matches[ 1 ].split( '#' )[ 0 ].indexOf( '?' ) === -1 ) {
// Take the relative path
href = matches[ 1 ];
isInternal = true;
// Check if this matches the server's script path (as used by red links)
relativeBase = mw.libs.ve.resolveUrl( mw.config.get( 'wgScript' ), doc ).replace( /^https?:/i, '' );
if ( relativeHref.indexOf( relativeBase ) === 0 ) {
uri = new mw.Uri( relativeHref );
if ( uri.query.title ) {
href = uri.query.title;
isInternal = true;
// This href doesn't necessarily come from Parsoid (and it might not have the "./" prefix), but
// this method will work fine.
data = mw.libs.ve.parseParsoidResourceName( href );
data.isInternal = isInternal;
return data;
* Expand a string of the form jquery.foo,bar|jquery.ui.baz,quux to
* an array of module names like [ 'jquery.foo', 'jquery.bar',
* 'jquery.ui.baz', 'jquery.ui.quux' ]
* Implementation of ResourceLoaderContext::expandModuleNames
* TODO: Consider upstreaming this to MW core.
* @param {string} moduleNames Packed module name list
* @return {string[]} Array of module names
mw.libs.ve.expandModuleNames = function ( moduleNames ) {
var modules = [];
moduleNames.split( '|' ).forEach( function ( group ) {
var matches, prefix, suffixes;
if ( group.indexOf( ',' ) === -1 ) {
// This is not a set of modules in foo.bar,baz notation
// but a single module
modules.push( group );
} else {
// This is a set of modules in foo.bar,baz notation
matches = group.match( /(.*)\.([^.]*)/ );
if ( !matches ) {
// Prefixless modules, i.e. without dots
modules = modules.concat( group.split( ',' ) );
} else {
// We have a prefix and a bunch of suffixes
prefix = matches[ 1 ];
suffixes = matches[ 2 ].split( ',' ); // [ 'bar', 'baz' ]
suffixes.forEach( function ( suffix ) {
modules.push( prefix + '.' + suffix );
} );
} );
return modules;
* Split Parsoid resource name into the href prefix and the page title.
* @param {string} resourceName Resource name, from a `href` or `resource` attribute
* @return {Object} Object with the following properties:
* @return {string} return.title Full page title in text form (with namespace, and spaces instead of underscores)
* @return {string} return.rawTitle The title without URL decoding and underscore normalization applied
mw.libs.ve.parseParsoidResourceName = function ( resourceName ) {
// Resource names are always prefixed with './' to prevent the MediaWiki namespace from being
// interpreted as a URL protocol, consider e.g. 'href="./File:Foo.png"'.
// (We accept input without the prefix, so this can also take plain page titles.)
var matches = resourceName.match( /^(\.\/|)(.*)$/ );
return {
// '%' and '?' are valid in page titles, but normally URI-encoded. This also changes underscores
// to spaces.
title: mw.libs.ve.decodeURIComponentIntoArticleTitle( matches[ 2 ] ),
rawTitle: matches[ 2 ]
* Extract the page title from a Parsoid resource name.
* @param {string} resourceName Resource name, from a `href` or `resource` attribute
* @return {string} Full page title in text form (with namespace, and spaces instead of underscores)
mw.libs.ve.normalizeParsoidResourceName = function ( resourceName ) {
return mw.libs.ve.parseParsoidResourceName( resourceName ).title;