From 94d3aee9f677a4b3a4e11f444dcf1adb0d26bdd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bartosz=20Dziewo=C5=84ski?=
Date: Mon, 30 Jan 2023 13:52:39 +0000
Subject: [PATCH] Revert "Rewrite mw.libs.ve.getTargetDataFromHref with URL
API"
This reverts commit 461c76981f474ba0f8285b8747722faf87d04f29.
Bug: T328143
Change-Id: Ib59192c650736eac9d4a2db130c3e29720c30486
---
modules/ve-mw/preinit/ve.utils.parsoid.js | 133 +++++++++++------
.../ve.dm.MWInternalLinkAnnotation.test.js | 140 +++++++-----------
modules/ve-mw/tests/dm/ve.dm.mwExample.js | 18 +--
.../tests/preinit/ve.utils.parsoid.test.js | 32 ----
...ui.MWWikitextStringTransferHandler.test.js | 3 -
5 files changed, 148 insertions(+), 178 deletions(-)
diff --git a/modules/ve-mw/preinit/ve.utils.parsoid.js b/modules/ve-mw/preinit/ve.utils.parsoid.js
index 59dd1d6fa8..a8a1ea6fa1 100644
--- a/modules/ve-mw/preinit/ve.utils.parsoid.js
+++ b/modules/ve-mw/preinit/ve.utils.parsoid.js
@@ -6,6 +6,23 @@
mw.libs.ve = mw.libs.ve || {};
+/**
+ * Resolve a URL relative to a given base.
+ *
+ * Copied from ve.resolveUrl
+ *
+ * @param {string} url URL to resolve
+ * @param {HTMLDocument} base Document whose base URL to use
+ * @return {string} Resolved URL
+ */
+mw.libs.ve.resolveUrl = function ( url, base ) {
+ var node = base.createElement( 'a' );
+ node.setAttribute( 'href', url );
+ // If doc.baseURI isn't set, node.href will be an empty string
+ // This is crazy, returning the original URL is better
+ return node.href || url;
+};
+
/**
* Decode a URI component into a mediawiki article title
*
@@ -256,62 +273,82 @@ mw.libs.ve.getTargetDataFromHref = function ( href, doc ) {
return str.replace( /([.?*+^$[\]\\(){}|-])/g, '\\$1' );
}
- function returnExternalData() {
- return { isInternal: false };
- }
-
- function returnInternalData( titleish ) {
- // This value doesn't necessarily come from Parsoid (and it might not have the "./" prefix), but
- // this method will work fine.
- var data = mw.libs.ve.parseParsoidResourceName( titleish );
- data.isInternal = true;
- return data;
- }
-
- var url = new URL( href, doc.baseURI );
+ var isInternal = null;
+ // Protocol relative href
+ var relativeHref = href.replace( /^https?:/i, '' );
+ var uri, queryLength;
// Equivalent to `ve.init.platform.getExternalLinkUrlProtocolsRegExp()`, which can not be called here
var externalLinkUrlProtocolsRegExp = new RegExp( '^(' + mw.config.get( 'wgUrlProtocols' ) + ')', 'i' );
- // We don't want external links that don't start with a registered external URL protocol
- // (to avoid generating 'javascript:' URLs), so treat it as internal
- if ( !externalLinkUrlProtocolsRegExp.test( url.toString() ) ) {
- return returnInternalData( url.toString() );
- }
-
- // Strip red link query parameters
- if ( url.searchParams.get( 'action' ) === 'edit' && url.searchParams.get( 'redlink' ) === '1' ) {
- url.searchParams.delete( 'action' );
- url.searchParams.delete( 'redlink' );
- }
- // Count remaining query parameters
- var keys = [];
- url.searchParams.forEach( function ( val, key ) {
- keys.push( key );
- } );
- var queryLength = keys.length;
-
- var relativeHref = url.toString().replace( /^https?:/i, '' );
- // Check if this matches the server's script path (as used by red links)
- var scriptBase = new URL( mw.config.get( 'wgScript' ), doc.baseURI ).toString().replace( /^https?:/i, '' );
- if ( relativeHref.indexOf( scriptBase ) === 0 ) {
- if ( queryLength === 1 && url.searchParams.get( 'title' ) ) {
- return returnInternalData( url.searchParams.get( 'title' ) + url.hash );
+ // Paths that don't start with a registered external url protocol
+ if ( !externalLinkUrlProtocolsRegExp.test( href ) ) {
+ isInternal = true;
+ if ( href.match( /^\.\// ) ) {
+ // The specific case of parsoid resource URIs, which are in the form `./Title`.
+ // If they're redlinks they now include a querystring which should be stripped.
+ try {
+ uri = new mw.Uri( href );
+ } catch ( e ) {
+ // probably an incorrecly encoded URI, try a very-naïve fallback
+ href = href.replace( /\?action=edit&redlink=1$/, '' );
+ }
+ if ( uri ) {
+ queryLength = Object.keys( uri.query ).length;
+ if (
+ ( queryLength === 2 && uri.query.action === 'edit' && uri.query.redlink === '1' )
+ ) {
+ uri.query = {};
+ href = '.' + uri.getRelativePath();
+ }
+ }
+ }
+ } else {
+ // Check if this matches the server's script path (as used by red links)
+ var scriptBase = mw.libs.ve.resolveUrl( mw.config.get( 'wgScript' ), doc ).replace( /^https?:/i, '' );
+ if ( relativeHref.indexOf( scriptBase ) === 0 ) {
+ try {
+ uri = new mw.Uri( relativeHref );
+ } catch ( e ) {
+ // probably an incorrectly encoded URI
+ }
+ if ( uri ) {
+ queryLength = Object.keys( uri.query ).length;
+ if (
+ ( queryLength === 1 && uri.query.title ) ||
+ ( queryLength === 3 && uri.query.title && uri.query.action === 'edit' && uri.query.redlink === '1' )
+ ) {
+ href = uri.query.title + ( uri.fragment ? '#' + uri.fragment : '' );
+ isInternal = true;
+ } else if ( queryLength > 1 ) {
+ href = relativeHref;
+ isInternal = false;
+ }
+ }
+ }
+ if ( isInternal === null ) {
+ // Check if this matches the server's article path
+ var articleBase = mw.libs.ve.resolveUrl( mw.config.get( 'wgArticlePath' ), doc ).replace( /^https?:/i, '' );
+ var articleBaseRegex = new RegExp( regexEscape( articleBase ).replace( regexEscape( '$1' ), '(.*)' ) );
+ var matches = relativeHref.match( articleBaseRegex );
+ if ( matches && matches[ 1 ].split( '#' )[ 0 ].indexOf( '?' ) === -1 ) {
+ // Take the relative path
+ href = matches[ 1 ];
+ isInternal = true;
+ } else {
+ isInternal = false;
+ }
}
}
- // Check if this matches the server's article path
- var articleBase = new URL( mw.config.get( 'wgArticlePath' ), doc.baseURI ).toString().replace( /^https?:/i, '' );
- var articleBaseRegex = new RegExp( regexEscape( articleBase ).replace( regexEscape( '$1' ), '(.*)' ) );
- var matches = relativeHref.match( articleBaseRegex );
- if ( matches ) {
- if ( queryLength === 0 && matches && matches[ 1 ].split( '#' )[ 0 ].indexOf( '?' ) === -1 ) {
- // Take the relative path
- return returnInternalData( matches[ 1 ] );
- }
+ if ( !isInternal ) {
+ return { isInternal: false };
}
- // Doesn't match any of the known URL patterns, or has extra parameters
- return returnExternalData();
+ // This href doesn't necessarily come from Parsoid (and it might not have the "./" prefix), but
+ // this method will work fine.
+ var data = mw.libs.ve.parseParsoidResourceName( href );
+ data.isInternal = true;
+ return data;
};
/**
diff --git a/modules/ve-mw/tests/dm/annotations/ve.dm.MWInternalLinkAnnotation.test.js b/modules/ve-mw/tests/dm/annotations/ve.dm.MWInternalLinkAnnotation.test.js
index 4b13763a15..64b8331995 100644
--- a/modules/ve-mw/tests/dm/annotations/ve.dm.MWInternalLinkAnnotation.test.js
+++ b/modules/ve-mw/tests/dm/annotations/ve.dm.MWInternalLinkAnnotation.test.js
@@ -7,28 +7,22 @@
QUnit.module( 've.dm.MWInternalLinkAnnotation', ve.test.utils.newMwEnvironment() );
QUnit.test( 'toDataElement', ( assert ) => {
- // The expected data depends on site configuration, so we need to generate the cases several times.
- const getCases = () => {
- const
- externalLink = ( href ) => {
+ const doc = ve.dm.mwExample.createExampleDocumentFromData( [], null, location.origin ),
+ externalLink = ( href ) => {
+ return () => {
const link = document.createElement( 'a' );
link.setAttribute( 'href', href );
return link;
- },
- internalLink = ( pageTitle, params ) => {
- const link = document.createElement( 'a' );
- link.setAttribute( 'href', mw.Title.newFromText( pageTitle ).getUrl( params ) );
- return link;
- },
- parsoidLink = ( href ) => {
- const link = document.createElement( 'a' );
- if ( mw.config.get( 'wgArticlePath' ).includes( '?' ) ) {
- href = href.replace( './', './index.php?title=' );
- }
- link.setAttribute( 'href', href );
- return link;
};
- return [
+ },
+ internalLink = ( pageTitle, params ) => {
+ return () => {
+ const link = document.createElement( 'a' );
+ link.setAttribute( 'href', location.origin + mw.Title.newFromText( pageTitle ).getUrl( params ) );
+ return link;
+ };
+ },
+ cases = [
{
msg: 'Not an internal link',
element: externalLink( 'http://example.com/' ),
@@ -53,7 +47,7 @@ QUnit.test( 'toDataElement', ( assert ) => {
},
{
msg: 'Relative path',
- element: parsoidLink( './Foo' ),
+ element: externalLink( './Foo' ),
expected: {
type: 'link/mwInternal',
attributes: {
@@ -69,7 +63,7 @@ QUnit.test( 'toDataElement', ( assert ) => {
expected: {
type: 'link/mwExternal',
attributes: {
- href: mw.Title.newFromText( 'Foo' ).getUrl( { action: 'history' } )
+ href: location.origin + mw.Title.newFromText( 'Foo' ).getUrl( { action: 'history' } )
}
}
},
@@ -79,7 +73,7 @@ QUnit.test( 'toDataElement', ( assert ) => {
expected: {
type: 'link/mwExternal',
attributes: {
- href: mw.Title.newFromText( 'Foo' ).getUrl( { diff: '3', oldid: '2' } )
+ href: location.origin + mw.Title.newFromText( 'Foo' ).getUrl( { diff: '3', oldid: '2' } )
}
}
},
@@ -99,14 +93,24 @@ QUnit.test( 'toDataElement', ( assert ) => {
// Because percent-encoded URLs aren't valid titles, but what they decode to might be
msg: 'Percent encoded characters',
element: internalLink( 'Foo?' ),
- expected: {
- type: 'link/mwInternal',
- attributes: {
- lookupTitle: 'Foo?',
- normalizedTitle: 'Foo?',
- title: 'Foo?'
+ expected: [
+ {
+ type: 'link/mwInternal',
+ attributes: {
+ lookupTitle: 'Foo?',
+ normalizedTitle: 'Foo?',
+ title: 'Foo?'
+ }
+ },
+ {
+ type: 'link/mwInternal',
+ attributes: {
+ lookupTitle: 'Foo?',
+ normalizedTitle: 'Foo?',
+ title: 'Foo?'
+ }
}
- }
+ ]
},
{
// The fragment should make it into some parts of this, and not others
@@ -134,77 +138,41 @@ QUnit.test( 'toDataElement', ( assert ) => {
}
}
}
- ];
- };
+ ],
+ converter = new ve.dm.Converter( ve.dm.modelRegistry, ve.dm.nodeFactory, ve.dm.annotationFactory, ve.dm.metaItemFactory );
- const converter = new ve.dm.Converter( ve.dm.modelRegistry, ve.dm.nodeFactory, ve.dm.annotationFactory, ve.dm.metaItemFactory );
+ // toDataElement is called during a converter run, so we need to fake up a bit of state to test it.
+ // This would normally be done by ve.dm.converter.getModelFromDom.
+ converter.doc = doc.getHtmlDocument();
+ converter.targetDoc = doc.getHtmlDocument();
+ converter.store = doc.getStore();
+ converter.internalList = doc.getInternalList();
+ converter.contextStack = [];
+ converter.fromClipboard = true;
const articlePaths = [
{
- // MediaWiki config settings:
- // $wgScriptPath = '/w';
- // $wgUsePathInfo = false;
msg: 'query string URL',
- config: {
- wgServer: 'http://example.org',
- wgScript: '/w/index.php',
- wgArticlePath: '/w/index.php?title=$1'
- },
- // Parsoid-generated given these MediaWiki config settings:
- base: 'http://example.org/w/'
+ wgArticlePath: mw.config.get( 'wgScriptPath' ) + '/index.php?title=$1'
},
{
- // MediaWiki config settings:
- // $wgScriptPath = '/w';
- // $wgUsePathInfo = true;
- msg: 'short URL using pathinfo',
- config: {
- wgServer: 'http://example.org',
- wgScript: '/w/index.php',
- wgArticlePath: '/w/index.php/$1'
- },
- // Parsoid-generated given these MediaWiki config settings:
- base: 'http://example.org/w/index.php/'
- },
- {
- // MediaWiki config settings:
- // $wgScriptPath = '/w';
- // $wgArticlePath = '/wiki/$1';
- msg: 'proper short URL',
- config: {
- wgServer: 'http://example.org',
- wgScript: '/w/index.php',
- wgArticlePath: '/wiki/$1'
- },
- // Parsoid-generated given these MediaWiki config settings:
- base: 'http://example.org/wiki/'
+ msg: 'short URL',
+ wgArticlePath: mw.config.get( 'wgScriptPath' ) + '/index.php/$1'
}
];
- articlePaths.forEach( function ( pathData ) {
- // Set up global state (site configuration)
- mw.config.set( pathData.config );
-
- const doc = ve.dm.mwExample.createExampleDocumentFromData( [], null, pathData.base );
- // toDataElement is called during a converter run, so we need to fake up a bit of state to test it.
- // This would normally be done by ve.dm.converter.getModelFromDom.
- converter.doc = doc.getHtmlDocument();
- converter.targetDoc = doc.getHtmlDocument();
- converter.store = doc.getStore();
- converter.internalList = doc.getInternalList();
- converter.contextStack = [];
- converter.fromClipboard = true;
-
- // Generate test cases for this site configuration
- const cases = getCases();
- for ( let i = 0; i < cases.length; i++ ) {
+ for ( let i = 0; i < cases.length; i++ ) {
+ articlePaths.forEach( function ( pathData, j ) {
+ mw.config.set( 'wgArticlePath', pathData.wgArticlePath );
assert.deepEqual(
- ve.dm.MWInternalLinkAnnotation.static.toDataElement( [ cases[ i ].element ], converter ),
- cases[ i ].expected,
+ ve.dm.MWInternalLinkAnnotation.static.toDataElement( [ cases[ i ].element() ], converter ),
+ Array.isArray( cases[ i ].expected ) ?
+ cases[ i ].expected[ j ] :
+ cases[ i ].expected,
cases[ i ].msg + ': ' + pathData.msg
);
- }
- } );
+ } );
+ }
} );
QUnit.test( 'getFragment', ( assert ) => {
diff --git a/modules/ve-mw/tests/dm/ve.dm.mwExample.js b/modules/ve-mw/tests/dm/ve.dm.mwExample.js
index 227a4c28cf..0e4906eb3d 100644
--- a/modules/ve-mw/tests/dm/ve.dm.mwExample.js
+++ b/modules/ve-mw/tests/dm/ve.dm.mwExample.js
@@ -223,7 +223,7 @@ ve.dm.mwExample.MWTransclusion.mixedStoreItems[ ve.dm.HashValueStore.prototype.h
$.parseHTML( ve.dm.mwExample.MWTransclusion.mixed );
ve.dm.mwExample.MWInternalLink = {
- absoluteHref: new URL( './Foo/Bar', ve.dm.mwExample.baseUri ).toString()
+ absoluteHref: new URL( '/wiki/Foo/Bar', ve.dm.example.baseUri ).toString()
};
ve.dm.mwExample.MWInternalLink.absoluteOpen = '';
@@ -237,7 +237,7 @@ ve.dm.mwExample.MWInternalLink.absoluteData = {
};
ve.dm.mwExample.MWInternalSectionLink = {
- absoluteHref: new URL( './Foo#Bar', ve.dm.mwExample.baseUri ).toString()
+ absoluteHref: new URL( '/wiki/Foo#Bar', ve.dm.example.baseUri ).toString()
};
ve.dm.mwExample.MWInternalSectionLink.absoluteOpen = '';
@@ -1560,7 +1560,7 @@ ve.dm.mwExample.domToDataCases = {
},
'internal link with absolute path': {
body: '' + ve.dm.mwExample.MWInternalLink.absoluteOpen + 'Foo
',
- base: ve.dm.mwExample.baseUri,
+ base: ve.dm.example.baseUri,
data: [
{ type: 'paragraph' },
[
@@ -1586,7 +1586,7 @@ ve.dm.mwExample.domToDataCases = {
},
'internal link with absolute path and section': {
body: '' + ve.dm.mwExample.MWInternalSectionLink.absoluteOpen + 'Foo
',
- base: ve.dm.mwExample.baseUri,
+ base: ve.dm.example.baseUri,
data: [
{ type: 'paragraph' },
[
@@ -1612,7 +1612,7 @@ ve.dm.mwExample.domToDataCases = {
},
'internal link with href set to ./': {
body: 'x
',
- base: ve.dm.mwExample.baseUri,
+ base: 'http://example.com',
data: [
{ type: 'paragraph' },
[
@@ -1634,7 +1634,7 @@ ve.dm.mwExample.domToDataCases = {
'internal link with special characters': {
body: 'x
',
ignoreXmlWarnings: true,
- base: ve.dm.mwExample.baseUri,
+ base: 'http://example.com',
data: [
{ type: 'paragraph' },
[
@@ -1860,7 +1860,7 @@ ve.dm.mwExample.domToDataCases = {
'' + ve.dm.example.commentNodePreview( 'barbaz' ) +
'' +
'',
- base: ve.dm.mwExample.baseUri,
+ base: 'http://example.com',
data: ve.dm.mwExample.withMeta,
realData: ve.dm.mwExample.withMetaRealData
},
@@ -2137,7 +2137,7 @@ ve.dm.mwExample.domToDataCases = {
' foo bar baz' +
'' +
'',
- base: ve.dm.mwExample.baseUri,
+ base: 'http://example.com',
data: [
{
type: 'mwBlockImage',
@@ -2269,7 +2269,7 @@ ve.dm.mwExample.domToDataCases = {
'plain internal links when pasted are converted to link/mwInternal': {
fromClipboard: true,
body: 'ab',
- base: ve.dm.mwExample.baseUri,
+ base: ve.dm.example.baseUri,
data: [
{
type: 'paragraph',
diff --git a/modules/ve-mw/tests/preinit/ve.utils.parsoid.test.js b/modules/ve-mw/tests/preinit/ve.utils.parsoid.test.js
index 557b3752db..01131ca30d 100644
--- a/modules/ve-mw/tests/preinit/ve.utils.parsoid.test.js
+++ b/modules/ve-mw/tests/preinit/ve.utils.parsoid.test.js
@@ -122,38 +122,6 @@ QUnit.test( 'getTargetDataFromHref', ( assert ) => {
isInternal: true
}
},
- {
- msg: 'Old parser link',
- href: '/wiki/Foo',
- expected: {
- title: 'Foo',
- isInternal: true
- }
- },
- {
- msg: 'Old parser red link',
- href: '/w/index.php?title=Foo&action=edit&redlink=1',
- expected: {
- title: 'Foo',
- isInternal: true
- }
- },
- {
- msg: 'Old parser link with fragment',
- href: '/wiki/Foo#Bar',
- expected: {
- title: 'Foo#Bar',
- isInternal: true
- }
- },
- {
- msg: 'Old parser red link with fragment (old parser does not actually generate links like this, but we recognize them)',
- href: '/w/index.php?title=Foo&action=edit&redlink=1#Bar',
- expected: {
- title: 'Foo#Bar',
- isInternal: true
- }
- },
{
msg: 'Full URL link to current wiki',
href: 'http://example.com/wiki/Foo',
diff --git a/modules/ve-mw/tests/ui/datatransferhandlers/ve.ui.MWWikitextStringTransferHandler.test.js b/modules/ve-mw/tests/ui/datatransferhandlers/ve.ui.MWWikitextStringTransferHandler.test.js
index f3ab8a905b..1fdb16dd92 100644
--- a/modules/ve-mw/tests/ui/datatransferhandlers/ve.ui.MWWikitextStringTransferHandler.test.js
+++ b/modules/ve-mw/tests/ui/datatransferhandlers/ve.ui.MWWikitextStringTransferHandler.test.js
@@ -259,9 +259,6 @@ QUnit.test( 'convert', function ( assert ) {
}
];
- mw.config.set( {
- wgArticlePath: '/wiki/$1'
- } );
for ( let i = 0; i < cases.length; i++ ) {
ve.test.utils.runWikitextStringHandlerTest(
assert, this.server, cases[ i ].pasteString, cases[ i ].pasteType, cases[ i ].parsoidResponse,