From 94d3aee9f677a4b3a4e11f444dcf1adb0d26bdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Dziewo=C5=84ski?= Date: Mon, 30 Jan 2023 13:52:39 +0000 Subject: [PATCH] Revert "Rewrite mw.libs.ve.getTargetDataFromHref with URL API" This reverts commit 461c76981f474ba0f8285b8747722faf87d04f29. Bug: T328143 Change-Id: Ib59192c650736eac9d4a2db130c3e29720c30486 --- modules/ve-mw/preinit/ve.utils.parsoid.js | 133 +++++++++++------ .../ve.dm.MWInternalLinkAnnotation.test.js | 140 +++++++----------- modules/ve-mw/tests/dm/ve.dm.mwExample.js | 18 +-- .../tests/preinit/ve.utils.parsoid.test.js | 32 ---- ...ui.MWWikitextStringTransferHandler.test.js | 3 - 5 files changed, 148 insertions(+), 178 deletions(-) diff --git a/modules/ve-mw/preinit/ve.utils.parsoid.js b/modules/ve-mw/preinit/ve.utils.parsoid.js index 59dd1d6fa8..a8a1ea6fa1 100644 --- a/modules/ve-mw/preinit/ve.utils.parsoid.js +++ b/modules/ve-mw/preinit/ve.utils.parsoid.js @@ -6,6 +6,23 @@ mw.libs.ve = mw.libs.ve || {}; +/** + * Resolve a URL relative to a given base. + * + * Copied from ve.resolveUrl + * + * @param {string} url URL to resolve + * @param {HTMLDocument} base Document whose base URL to use + * @return {string} Resolved URL + */ +mw.libs.ve.resolveUrl = function ( url, base ) { + var node = base.createElement( 'a' ); + node.setAttribute( 'href', url ); + // If doc.baseURI isn't set, node.href will be an empty string + // This is crazy, returning the original URL is better + return node.href || url; +}; + /** * Decode a URI component into a mediawiki article title * @@ -256,62 +273,82 @@ mw.libs.ve.getTargetDataFromHref = function ( href, doc ) { return str.replace( /([.?*+^$[\]\\(){}|-])/g, '\\$1' ); } - function returnExternalData() { - return { isInternal: false }; - } - - function returnInternalData( titleish ) { - // This value doesn't necessarily come from Parsoid (and it might not have the "./" prefix), but - // this method will work fine. - var data = mw.libs.ve.parseParsoidResourceName( titleish ); - data.isInternal = true; - return data; - } - - var url = new URL( href, doc.baseURI ); + var isInternal = null; + // Protocol relative href + var relativeHref = href.replace( /^https?:/i, '' ); + var uri, queryLength; // Equivalent to `ve.init.platform.getExternalLinkUrlProtocolsRegExp()`, which can not be called here var externalLinkUrlProtocolsRegExp = new RegExp( '^(' + mw.config.get( 'wgUrlProtocols' ) + ')', 'i' ); - // We don't want external links that don't start with a registered external URL protocol - // (to avoid generating 'javascript:' URLs), so treat it as internal - if ( !externalLinkUrlProtocolsRegExp.test( url.toString() ) ) { - return returnInternalData( url.toString() ); - } - - // Strip red link query parameters - if ( url.searchParams.get( 'action' ) === 'edit' && url.searchParams.get( 'redlink' ) === '1' ) { - url.searchParams.delete( 'action' ); - url.searchParams.delete( 'redlink' ); - } - // Count remaining query parameters - var keys = []; - url.searchParams.forEach( function ( val, key ) { - keys.push( key ); - } ); - var queryLength = keys.length; - - var relativeHref = url.toString().replace( /^https?:/i, '' ); - // Check if this matches the server's script path (as used by red links) - var scriptBase = new URL( mw.config.get( 'wgScript' ), doc.baseURI ).toString().replace( /^https?:/i, '' ); - if ( relativeHref.indexOf( scriptBase ) === 0 ) { - if ( queryLength === 1 && url.searchParams.get( 'title' ) ) { - return returnInternalData( url.searchParams.get( 'title' ) + url.hash ); + // Paths that don't start with a registered external url protocol + if ( !externalLinkUrlProtocolsRegExp.test( href ) ) { + isInternal = true; + if ( href.match( /^\.\// ) ) { + // The specific case of parsoid resource URIs, which are in the form `./Title`. + // If they're redlinks they now include a querystring which should be stripped. + try { + uri = new mw.Uri( href ); + } catch ( e ) { + // probably an incorrecly encoded URI, try a very-naïve fallback + href = href.replace( /\?action=edit&redlink=1$/, '' ); + } + if ( uri ) { + queryLength = Object.keys( uri.query ).length; + if ( + ( queryLength === 2 && uri.query.action === 'edit' && uri.query.redlink === '1' ) + ) { + uri.query = {}; + href = '.' + uri.getRelativePath(); + } + } + } + } else { + // Check if this matches the server's script path (as used by red links) + var scriptBase = mw.libs.ve.resolveUrl( mw.config.get( 'wgScript' ), doc ).replace( /^https?:/i, '' ); + if ( relativeHref.indexOf( scriptBase ) === 0 ) { + try { + uri = new mw.Uri( relativeHref ); + } catch ( e ) { + // probably an incorrectly encoded URI + } + if ( uri ) { + queryLength = Object.keys( uri.query ).length; + if ( + ( queryLength === 1 && uri.query.title ) || + ( queryLength === 3 && uri.query.title && uri.query.action === 'edit' && uri.query.redlink === '1' ) + ) { + href = uri.query.title + ( uri.fragment ? '#' + uri.fragment : '' ); + isInternal = true; + } else if ( queryLength > 1 ) { + href = relativeHref; + isInternal = false; + } + } + } + if ( isInternal === null ) { + // Check if this matches the server's article path + var articleBase = mw.libs.ve.resolveUrl( mw.config.get( 'wgArticlePath' ), doc ).replace( /^https?:/i, '' ); + var articleBaseRegex = new RegExp( regexEscape( articleBase ).replace( regexEscape( '$1' ), '(.*)' ) ); + var matches = relativeHref.match( articleBaseRegex ); + if ( matches && matches[ 1 ].split( '#' )[ 0 ].indexOf( '?' ) === -1 ) { + // Take the relative path + href = matches[ 1 ]; + isInternal = true; + } else { + isInternal = false; + } } } - // Check if this matches the server's article path - var articleBase = new URL( mw.config.get( 'wgArticlePath' ), doc.baseURI ).toString().replace( /^https?:/i, '' ); - var articleBaseRegex = new RegExp( regexEscape( articleBase ).replace( regexEscape( '$1' ), '(.*)' ) ); - var matches = relativeHref.match( articleBaseRegex ); - if ( matches ) { - if ( queryLength === 0 && matches && matches[ 1 ].split( '#' )[ 0 ].indexOf( '?' ) === -1 ) { - // Take the relative path - return returnInternalData( matches[ 1 ] ); - } + if ( !isInternal ) { + return { isInternal: false }; } - // Doesn't match any of the known URL patterns, or has extra parameters - return returnExternalData(); + // This href doesn't necessarily come from Parsoid (and it might not have the "./" prefix), but + // this method will work fine. + var data = mw.libs.ve.parseParsoidResourceName( href ); + data.isInternal = true; + return data; }; /** diff --git a/modules/ve-mw/tests/dm/annotations/ve.dm.MWInternalLinkAnnotation.test.js b/modules/ve-mw/tests/dm/annotations/ve.dm.MWInternalLinkAnnotation.test.js index 4b13763a15..64b8331995 100644 --- a/modules/ve-mw/tests/dm/annotations/ve.dm.MWInternalLinkAnnotation.test.js +++ b/modules/ve-mw/tests/dm/annotations/ve.dm.MWInternalLinkAnnotation.test.js @@ -7,28 +7,22 @@ QUnit.module( 've.dm.MWInternalLinkAnnotation', ve.test.utils.newMwEnvironment() ); QUnit.test( 'toDataElement', ( assert ) => { - // The expected data depends on site configuration, so we need to generate the cases several times. - const getCases = () => { - const - externalLink = ( href ) => { + const doc = ve.dm.mwExample.createExampleDocumentFromData( [], null, location.origin ), + externalLink = ( href ) => { + return () => { const link = document.createElement( 'a' ); link.setAttribute( 'href', href ); return link; - }, - internalLink = ( pageTitle, params ) => { - const link = document.createElement( 'a' ); - link.setAttribute( 'href', mw.Title.newFromText( pageTitle ).getUrl( params ) ); - return link; - }, - parsoidLink = ( href ) => { - const link = document.createElement( 'a' ); - if ( mw.config.get( 'wgArticlePath' ).includes( '?' ) ) { - href = href.replace( './', './index.php?title=' ); - } - link.setAttribute( 'href', href ); - return link; }; - return [ + }, + internalLink = ( pageTitle, params ) => { + return () => { + const link = document.createElement( 'a' ); + link.setAttribute( 'href', location.origin + mw.Title.newFromText( pageTitle ).getUrl( params ) ); + return link; + }; + }, + cases = [ { msg: 'Not an internal link', element: externalLink( 'http://example.com/' ), @@ -53,7 +47,7 @@ QUnit.test( 'toDataElement', ( assert ) => { }, { msg: 'Relative path', - element: parsoidLink( './Foo' ), + element: externalLink( './Foo' ), expected: { type: 'link/mwInternal', attributes: { @@ -69,7 +63,7 @@ QUnit.test( 'toDataElement', ( assert ) => { expected: { type: 'link/mwExternal', attributes: { - href: mw.Title.newFromText( 'Foo' ).getUrl( { action: 'history' } ) + href: location.origin + mw.Title.newFromText( 'Foo' ).getUrl( { action: 'history' } ) } } }, @@ -79,7 +73,7 @@ QUnit.test( 'toDataElement', ( assert ) => { expected: { type: 'link/mwExternal', attributes: { - href: mw.Title.newFromText( 'Foo' ).getUrl( { diff: '3', oldid: '2' } ) + href: location.origin + mw.Title.newFromText( 'Foo' ).getUrl( { diff: '3', oldid: '2' } ) } } }, @@ -99,14 +93,24 @@ QUnit.test( 'toDataElement', ( assert ) => { // Because percent-encoded URLs aren't valid titles, but what they decode to might be msg: 'Percent encoded characters', element: internalLink( 'Foo?' ), - expected: { - type: 'link/mwInternal', - attributes: { - lookupTitle: 'Foo?', - normalizedTitle: 'Foo?', - title: 'Foo?' + expected: [ + { + type: 'link/mwInternal', + attributes: { + lookupTitle: 'Foo?', + normalizedTitle: 'Foo?', + title: 'Foo?' + } + }, + { + type: 'link/mwInternal', + attributes: { + lookupTitle: 'Foo?', + normalizedTitle: 'Foo?', + title: 'Foo?' + } } - } + ] }, { // The fragment should make it into some parts of this, and not others @@ -134,77 +138,41 @@ QUnit.test( 'toDataElement', ( assert ) => { } } } - ]; - }; + ], + converter = new ve.dm.Converter( ve.dm.modelRegistry, ve.dm.nodeFactory, ve.dm.annotationFactory, ve.dm.metaItemFactory ); - const converter = new ve.dm.Converter( ve.dm.modelRegistry, ve.dm.nodeFactory, ve.dm.annotationFactory, ve.dm.metaItemFactory ); + // toDataElement is called during a converter run, so we need to fake up a bit of state to test it. + // This would normally be done by ve.dm.converter.getModelFromDom. + converter.doc = doc.getHtmlDocument(); + converter.targetDoc = doc.getHtmlDocument(); + converter.store = doc.getStore(); + converter.internalList = doc.getInternalList(); + converter.contextStack = []; + converter.fromClipboard = true; const articlePaths = [ { - // MediaWiki config settings: - // $wgScriptPath = '/w'; - // $wgUsePathInfo = false; msg: 'query string URL', - config: { - wgServer: 'http://example.org', - wgScript: '/w/index.php', - wgArticlePath: '/w/index.php?title=$1' - }, - // Parsoid-generated given these MediaWiki config settings: - base: 'http://example.org/w/' + wgArticlePath: mw.config.get( 'wgScriptPath' ) + '/index.php?title=$1' }, { - // MediaWiki config settings: - // $wgScriptPath = '/w'; - // $wgUsePathInfo = true; - msg: 'short URL using pathinfo', - config: { - wgServer: 'http://example.org', - wgScript: '/w/index.php', - wgArticlePath: '/w/index.php/$1' - }, - // Parsoid-generated given these MediaWiki config settings: - base: 'http://example.org/w/index.php/' - }, - { - // MediaWiki config settings: - // $wgScriptPath = '/w'; - // $wgArticlePath = '/wiki/$1'; - msg: 'proper short URL', - config: { - wgServer: 'http://example.org', - wgScript: '/w/index.php', - wgArticlePath: '/wiki/$1' - }, - // Parsoid-generated given these MediaWiki config settings: - base: 'http://example.org/wiki/' + msg: 'short URL', + wgArticlePath: mw.config.get( 'wgScriptPath' ) + '/index.php/$1' } ]; - articlePaths.forEach( function ( pathData ) { - // Set up global state (site configuration) - mw.config.set( pathData.config ); - - const doc = ve.dm.mwExample.createExampleDocumentFromData( [], null, pathData.base ); - // toDataElement is called during a converter run, so we need to fake up a bit of state to test it. - // This would normally be done by ve.dm.converter.getModelFromDom. - converter.doc = doc.getHtmlDocument(); - converter.targetDoc = doc.getHtmlDocument(); - converter.store = doc.getStore(); - converter.internalList = doc.getInternalList(); - converter.contextStack = []; - converter.fromClipboard = true; - - // Generate test cases for this site configuration - const cases = getCases(); - for ( let i = 0; i < cases.length; i++ ) { + for ( let i = 0; i < cases.length; i++ ) { + articlePaths.forEach( function ( pathData, j ) { + mw.config.set( 'wgArticlePath', pathData.wgArticlePath ); assert.deepEqual( - ve.dm.MWInternalLinkAnnotation.static.toDataElement( [ cases[ i ].element ], converter ), - cases[ i ].expected, + ve.dm.MWInternalLinkAnnotation.static.toDataElement( [ cases[ i ].element() ], converter ), + Array.isArray( cases[ i ].expected ) ? + cases[ i ].expected[ j ] : + cases[ i ].expected, cases[ i ].msg + ': ' + pathData.msg ); - } - } ); + } ); + } } ); QUnit.test( 'getFragment', ( assert ) => { diff --git a/modules/ve-mw/tests/dm/ve.dm.mwExample.js b/modules/ve-mw/tests/dm/ve.dm.mwExample.js index 227a4c28cf..0e4906eb3d 100644 --- a/modules/ve-mw/tests/dm/ve.dm.mwExample.js +++ b/modules/ve-mw/tests/dm/ve.dm.mwExample.js @@ -223,7 +223,7 @@ ve.dm.mwExample.MWTransclusion.mixedStoreItems[ ve.dm.HashValueStore.prototype.h $.parseHTML( ve.dm.mwExample.MWTransclusion.mixed ); ve.dm.mwExample.MWInternalLink = { - absoluteHref: new URL( './Foo/Bar', ve.dm.mwExample.baseUri ).toString() + absoluteHref: new URL( '/wiki/Foo/Bar', ve.dm.example.baseUri ).toString() }; ve.dm.mwExample.MWInternalLink.absoluteOpen = ''; @@ -237,7 +237,7 @@ ve.dm.mwExample.MWInternalLink.absoluteData = { }; ve.dm.mwExample.MWInternalSectionLink = { - absoluteHref: new URL( './Foo#Bar', ve.dm.mwExample.baseUri ).toString() + absoluteHref: new URL( '/wiki/Foo#Bar', ve.dm.example.baseUri ).toString() }; ve.dm.mwExample.MWInternalSectionLink.absoluteOpen = ''; @@ -1560,7 +1560,7 @@ ve.dm.mwExample.domToDataCases = { }, 'internal link with absolute path': { body: '

' + ve.dm.mwExample.MWInternalLink.absoluteOpen + 'Foo

', - base: ve.dm.mwExample.baseUri, + base: ve.dm.example.baseUri, data: [ { type: 'paragraph' }, [ @@ -1586,7 +1586,7 @@ ve.dm.mwExample.domToDataCases = { }, 'internal link with absolute path and section': { body: '

' + ve.dm.mwExample.MWInternalSectionLink.absoluteOpen + 'Foo

', - base: ve.dm.mwExample.baseUri, + base: ve.dm.example.baseUri, data: [ { type: 'paragraph' }, [ @@ -1612,7 +1612,7 @@ ve.dm.mwExample.domToDataCases = { }, 'internal link with href set to ./': { body: '

x

', - base: ve.dm.mwExample.baseUri, + base: 'http://example.com', data: [ { type: 'paragraph' }, [ @@ -1634,7 +1634,7 @@ ve.dm.mwExample.domToDataCases = { 'internal link with special characters': { body: '

x

', ignoreXmlWarnings: true, - base: ve.dm.mwExample.baseUri, + base: 'http://example.com', data: [ { type: 'paragraph' }, [ @@ -1860,7 +1860,7 @@ ve.dm.mwExample.domToDataCases = { '' + ve.dm.example.commentNodePreview( 'barbaz' ) + '' + '', - base: ve.dm.mwExample.baseUri, + base: 'http://example.com', data: ve.dm.mwExample.withMeta, realData: ve.dm.mwExample.withMetaRealData }, @@ -2137,7 +2137,7 @@ ve.dm.mwExample.domToDataCases = { ' foo bar baz' + '' + '', - base: ve.dm.mwExample.baseUri, + base: 'http://example.com', data: [ { type: 'mwBlockImage', @@ -2269,7 +2269,7 @@ ve.dm.mwExample.domToDataCases = { 'plain internal links when pasted are converted to link/mwInternal': { fromClipboard: true, body: 'ab', - base: ve.dm.mwExample.baseUri, + base: ve.dm.example.baseUri, data: [ { type: 'paragraph', diff --git a/modules/ve-mw/tests/preinit/ve.utils.parsoid.test.js b/modules/ve-mw/tests/preinit/ve.utils.parsoid.test.js index 557b3752db..01131ca30d 100644 --- a/modules/ve-mw/tests/preinit/ve.utils.parsoid.test.js +++ b/modules/ve-mw/tests/preinit/ve.utils.parsoid.test.js @@ -122,38 +122,6 @@ QUnit.test( 'getTargetDataFromHref', ( assert ) => { isInternal: true } }, - { - msg: 'Old parser link', - href: '/wiki/Foo', - expected: { - title: 'Foo', - isInternal: true - } - }, - { - msg: 'Old parser red link', - href: '/w/index.php?title=Foo&action=edit&redlink=1', - expected: { - title: 'Foo', - isInternal: true - } - }, - { - msg: 'Old parser link with fragment', - href: '/wiki/Foo#Bar', - expected: { - title: 'Foo#Bar', - isInternal: true - } - }, - { - msg: 'Old parser red link with fragment (old parser does not actually generate links like this, but we recognize them)', - href: '/w/index.php?title=Foo&action=edit&redlink=1#Bar', - expected: { - title: 'Foo#Bar', - isInternal: true - } - }, { msg: 'Full URL link to current wiki', href: 'http://example.com/wiki/Foo', diff --git a/modules/ve-mw/tests/ui/datatransferhandlers/ve.ui.MWWikitextStringTransferHandler.test.js b/modules/ve-mw/tests/ui/datatransferhandlers/ve.ui.MWWikitextStringTransferHandler.test.js index f3ab8a905b..1fdb16dd92 100644 --- a/modules/ve-mw/tests/ui/datatransferhandlers/ve.ui.MWWikitextStringTransferHandler.test.js +++ b/modules/ve-mw/tests/ui/datatransferhandlers/ve.ui.MWWikitextStringTransferHandler.test.js @@ -259,9 +259,6 @@ QUnit.test( 'convert', function ( assert ) { } ]; - mw.config.set( { - wgArticlePath: '/wiki/$1' - } ); for ( let i = 0; i < cases.length; i++ ) { ve.test.utils.runWikitextStringHandlerTest( assert, this.server, cases[ i ].pasteString, cases[ i ].pasteType, cases[ i ].parsoidResponse,