/*! * Parsoid utilities. * * @copyright 2011-2020 VisualEditor Team and others; see http://ve.mit-license.org */ mw.libs.ve = mw.libs.ve || {}; /** * Resolve a URL relative to a given base. * * Copied from ve.resolveUrl * * @param {string} url URL to resolve * @param {HTMLDocument} base Document whose base URL to use * @return {string} Resolved URL */ mw.libs.ve.resolveUrl = function ( url, base ) { var node = base.createElement( 'a' ); node.setAttribute( 'href', url ); // If doc.baseURI isn't set, node.href will be an empty string // This is crazy, returning the original URL is better return node.href || url; }; /** * Decode a URI component into a mediawiki article title * * N.B. Illegal article titles can result from fairly reasonable input (e.g. "100%25beef"); * see https://phabricator.wikimedia.org/T137847 . * * @param {string} s String to decode * @param {boolean} [preserveUnderscores=false] Don't convert underscores to spaces * @return {string} Decoded string, or original string if decodeURIComponent failed */ mw.libs.ve.decodeURIComponentIntoArticleTitle = function ( s, preserveUnderscores ) { try { s = decodeURIComponent( s ); } catch ( e ) { return s; } if ( preserveUnderscores ) { return s; } return s.replace( /_/g, ' ' ); }; /** * Unwrap Parsoid sections * * @param {HTMLElement} element Parent element, e.g. document body * @param {string} [keepSection] Section to keep */ mw.libs.ve.unwrapParsoidSections = function ( element, keepSection ) { Array.prototype.forEach.call( element.querySelectorAll( 'section[data-mw-section-id]' ), function ( section ) { var parent = section.parentNode, sectionId = section.getAttribute( 'data-mw-section-id' ); // Copy section ID to first child (should be a heading) // Pseudo-sections (with negative section IDs) may not have a heading if ( sectionId !== null && +sectionId > 0 ) { section.firstChild.setAttribute( 'data-mw-section-id', sectionId ); } if ( keepSection !== undefined && sectionId === keepSection ) { return; } while ( section.firstChild ) { parent.insertBefore( section.firstChild, section ); } parent.removeChild( section ); } ); }; /** * Strip legacy (non-HTML5) IDs; typically found as section IDs inside * headings. * * @param {HTMLElement} element Parent element, e.g. document body */ mw.libs.ve.stripParsoidFallbackIds = function ( element ) { Array.prototype.forEach.call( element.querySelectorAll( 'span[typeof="mw:FallbackId"][id]:empty' ), function ( legacySpan ) { legacySpan.parentNode.removeChild( legacySpan ); } ); }; mw.libs.ve.restbaseIdRegExp = /^mw[a-zA-Z0-9\-_]{2,6}$/; mw.libs.ve.stripRestbaseIds = function ( doc ) { var restbaseIdRegExp = mw.libs.ve.restbaseIdRegExp; Array.prototype.forEach.call( doc.querySelectorAll( '[id^="mw"]' ), function ( element ) { if ( restbaseIdRegExp.test( element.id ) ) { element.removeAttribute( 'id' ); } } ); }; /** * Re-duplicate deduplicated TemplateStyles, for correct rendering when editing a section or * when templates are removed during the edit. * * @param {HTMLElement} element Parent element, e.g. document body */ mw.libs.ve.reduplicateStyles = function ( element ) { Array.prototype.forEach.call( element.querySelectorAll( 'link[rel~="mw-deduplicated-inline-style"]' ), function ( link ) { var href = link.getAttribute( 'href' ); if ( !href || href.slice( 0, 'mw-data:'.length ) !== 'mw-data:' ) { return; } var key = href.slice( 'mw-data:'.length ); var style = element.querySelector( 'style[data-mw-deduplicate="' + key + '"]' ); if ( !style ) { return; } var newStyle = link.ownerDocument.createElement( 'style' ); newStyle.setAttribute( 'data-mw-deduplicate', key ); // Copy content from the old `style` node (for rendering) for ( var i = 0; i < style.childNodes.length; i++ ) { newStyle.appendChild( style.childNodes[ i ].cloneNode( true ) ); } // Copy attributes from the old `link` node (for selser) Array.prototype.forEach.call( link.attributes, function ( attr ) { if ( attr.name !== 'rel' && attr.name !== 'href' ) { newStyle.setAttribute( attr.name, attr.value ); } } ); link.parentNode.replaceChild( newStyle, link ); } ); Array.prototype.forEach.call( element.querySelectorAll( 'style[data-mw-deduplicate]:empty' ), function ( style ) { var key = style.getAttribute( 'data-mw-deduplicate' ); var firstStyle = element.querySelector( 'style[data-mw-deduplicate="' + key + '"]' ); if ( !firstStyle || firstStyle === style ) { return; } // Copy content from the first matching `style` node (for rendering) for ( var i = 0; i < firstStyle.childNodes.length; i++ ) { style.appendChild( firstStyle.childNodes[ i ].cloneNode( true ) ); } } ); }; /** * De-duplicate TemplateStyles, like Parsoid does. * * @param {HTMLElement} element Parent element, e.g. document body */ mw.libs.ve.deduplicateStyles = function ( element ) { /** * Check whether `node` is in a fosterable position. (Nodes in these positions may be moved * elsewhere in the DOM by the HTML5 parsing algorithm, if they don't have the right tag name.) * https://html.spec.whatwg.org/#appropriate-place-for-inserting-a-node * * @private * @param {Node|null} node * @return {boolean} */ function isFosterablePosition( node ) { var fosterablePositions = [ 'table', 'thead', 'tbody', 'tfoot', 'tr' ]; return node && fosterablePositions.indexOf( node.parentNode.nodeName.toLowerCase() ) !== -1; } var styleTagKeys = {}; Array.prototype.forEach.call( element.querySelectorAll( 'style[data-mw-deduplicate]' ), function ( style ) { var key = style.getAttribute( 'data-mw-deduplicate' ); if ( !styleTagKeys[ key ] ) { // Not a dupe styleTagKeys[ key ] = true; return; } if ( !isFosterablePosition( style ) ) { // Dupe - replace with a placeholder reference var link = style.ownerDocument.createElement( 'link' ); link.setAttribute( 'rel', 'mw-deduplicated-inline-style' ); link.setAttribute( 'href', 'mw-data:' + key ); // Copy attributes from the old `link` node (for selser) Array.prototype.forEach.call( style.attributes, function ( attr ) { if ( attr.name !== 'rel' && attr.name !== 'data-mw-deduplicate' ) { link.setAttribute( attr.name, attr.value ); } } ); style.parentNode.replaceChild( link, style ); } else { // Duplicate style tag found in fosterable position. // Not deduping it (to avoid corruption when the resulting HTML is parsed: T299767), // but emptying out the style tag for consistency with Parsoid. // Parsoid says it does this for performance reasons. style.innerHTML = ''; } } ); }; /** * Fix fragment links which should be relative to the current document * * This prevents these links from trying to navigate to another page, * or open in a new window. * * Call this after ve.targetLinksToNewWindow, as it removes the target attribute. * Call this after LinkCache.styleParsoidElements, as it breaks that method by including the query string. * * @param {HTMLElement} container Parent element, e.g. document body * @param {mw.Title} docTitle Current title, only links to this title will be normalized * @param {string} [prefix] Prefix to add to fragment and target ID to avoid collisions */ mw.libs.ve.fixFragmentLinks = function ( container, docTitle, prefix ) { var docTitleText = docTitle.getPrefixedText(); prefix = prefix || ''; Array.prototype.forEach.call( container.querySelectorAll( 'a[href*="#"]' ), function ( el ) { var fragment = null; if ( el.getAttribute( 'href' )[ 0 ] === '#' ) { // Legacy parser fragment = el.getAttribute( 'href' ).slice( 1 ); } else { // Parsoid HTML var targetData = mw.libs.ve.getTargetDataFromHref( el.href, el.ownerDocument ); if ( targetData.isInternal ) { var title = mw.Title.newFromText( targetData.title ); if ( title && title.getPrefixedText() === docTitleText ) { fragment = new URL( el.href ).hash.slice( 1 ); } } } if ( fragment !== null ) { if ( !fragment ) { // Special case for empty fragment, even if prefix set el.setAttribute( 'href', '#' ); } else { if ( prefix ) { var target = container.querySelector( '#' + $.escapeSelector( fragment ) ); // There may be multiple links to a specific target, so check the target // hasn't already been fixed (in which case it would be null) if ( target ) { target.setAttribute( 'id', prefix + fragment ); target.setAttribute( 'data-mw-id-fixed', '' ); } } el.setAttribute( 'href', '#' + prefix + fragment ); } el.removeAttribute( 'target' ); } } ); // Remove any section heading anchors which weren't fixed above (T218492) Array.prototype.forEach.call( container.querySelectorAll( 'h1, h2, h3, h4, h5, h6' ), function ( el ) { if ( el.hasAttribute( 'id' ) && !el.hasAttribute( 'data-mw-id-fixed' ) ) { el.removeAttribute( 'id' ); } } ); }; /** * Parse URL to get title it points to. * * @param {string} href * @param {HTMLDocument} doc Document whose base URL to use * @return {Object} Information about the given href * @return {string} [return.title] * The title of the internal link (if the href is internal) * @return {boolean} return.isInternal * True if the href pointed to the local wiki, false if href is external */ mw.libs.ve.getTargetDataFromHref = function ( href, doc ) { function regexEscape( str ) { return str.replace( /([.?*+^$[\]\\(){}|-])/g, '\\$1' ); } var isInternal = null; // Protocol relative href var relativeHref = href.replace( /^https?:/i, '' ); var uri, queryLength; // Equivalent to `ve.init.platform.getExternalLinkUrlProtocolsRegExp()`, which can not be called here var externalLinkUrlProtocolsRegExp = new RegExp( '^(' + mw.config.get( 'wgUrlProtocols' ) + ')', 'i' ); // Paths that don't start with a registered external url protocol if ( !externalLinkUrlProtocolsRegExp.test( href ) ) { isInternal = true; if ( href.match( /^\.\// ) ) { // The specific case of parsoid resource URIs, which are in the form `./Title`. // If they're redlinks they now include a querystring which should be stripped. try { uri = new mw.Uri( href ); } catch ( e ) { // probably an incorrecly encoded URI, try a very-naïve fallback href = href.replace( /\?action=edit&redlink=1$/, '' ); } if ( uri ) { queryLength = Object.keys( uri.query ).length; if ( ( queryLength === 2 && uri.query.action === 'edit' && uri.query.redlink === '1' ) ) { uri.query = {}; href = '.' + uri.getRelativePath(); } } } } else { // Check if this matches the server's script path (as used by red links) var scriptBase = mw.libs.ve.resolveUrl( mw.config.get( 'wgScript' ), doc ).replace( /^https?:/i, '' ); if ( relativeHref.indexOf( scriptBase ) === 0 ) { try { uri = new mw.Uri( relativeHref ); } catch ( e ) { // probably an incorrectly encoded URI } if ( uri ) { queryLength = Object.keys( uri.query ).length; if ( ( queryLength === 1 && uri.query.title ) || ( queryLength === 3 && uri.query.title && uri.query.action === 'edit' && uri.query.redlink === '1' ) ) { href = uri.query.title + ( uri.fragment ? '#' + uri.fragment : '' ); isInternal = true; } else if ( queryLength > 1 ) { href = relativeHref; isInternal = false; } } } if ( isInternal === null ) { // Check if this matches the server's article path var articleBase = mw.libs.ve.resolveUrl( mw.config.get( 'wgArticlePath' ), doc ).replace( /^https?:/i, '' ); var articleBaseRegex = new RegExp( regexEscape( articleBase ).replace( regexEscape( '$1' ), '(.*)' ) ); var matches = relativeHref.match( articleBaseRegex ); if ( matches && matches[ 1 ].split( '#' )[ 0 ].indexOf( '?' ) === -1 ) { // Take the relative path href = matches[ 1 ]; isInternal = true; } else { isInternal = false; } } } if ( !isInternal ) { return { isInternal: false }; } // This href doesn't necessarily come from Parsoid (and it might not have the "./" prefix), but // this method will work fine. var data = mw.libs.ve.parseParsoidResourceName( href ); data.isInternal = true; return data; }; /** * Encode a page title into a Parsoid resource name. * * @param {string} title * @return {string} */ mw.libs.ve.encodeParsoidResourceName = function ( title ) { // Parsoid: Sanitizer::sanitizeTitleURI, Env::makeLink var idx = title.indexOf( '#' ); var anchor = null; if ( idx !== -1 ) { anchor = title.slice( idx + 1 ); title = title.slice( 0, idx ); } var encodedTitle = title.replace( /[%? [\]#|<>]/g, function ( match ) { return mw.util.wikiUrlencode( match ); } ); if ( anchor !== null ) { encodedTitle += '#' + mw.util.escapeIdForLink( anchor ); } return './' + encodedTitle; }; /** * Split Parsoid resource name into the href prefix and the page title. * * @param {string} resourceName Resource name, from a `href` or `resource` attribute * @return {Object} Object with the following properties: * @return {string} return.title Full page title in text form (with namespace, and spaces instead of underscores) */ mw.libs.ve.parseParsoidResourceName = function ( resourceName ) { // Resource names are always prefixed with './' to prevent the MediaWiki namespace from being // interpreted as a URL protocol, consider e.g. 'href="./File:Foo.png"'. // (We accept input without the prefix, so this can also take plain page titles.) var matches = resourceName.match( /^(\.\/|)(.*)$/ ); return { // '%' and '?' are valid in page titles, but normally URI-encoded. This also changes underscores // to spaces. title: mw.libs.ve.decodeURIComponentIntoArticleTitle( matches[ 2 ] ) }; }; /** * Extract the page title from a Parsoid resource name. * * @param {string} resourceName Resource name, from a `href` or `resource` attribute * @return {string} Full page title in text form (with namespace, and spaces instead of underscores) */ mw.libs.ve.normalizeParsoidResourceName = function ( resourceName ) { return mw.libs.ve.parseParsoidResourceName( resourceName ).title; };