Revert "Rewrite mw.libs.ve.getTargetDataFromHref with URL API"

This reverts commit 461c76981f.

Bug: T328143
Change-Id: Ib59192c650736eac9d4a2db130c3e29720c30486
This commit is contained in:
Bartosz Dziewoński 2023-01-30 13:52:39 +00:00
parent bd30d326e2
commit 94d3aee9f6
5 changed files with 148 additions and 178 deletions

View file

@ -6,6 +6,23 @@
mw.libs.ve = mw.libs.ve || {};
/**
* Resolve a URL relative to a given base.
*
* Copied from ve.resolveUrl
*
* @param {string} url URL to resolve
* @param {HTMLDocument} base Document whose base URL to use
* @return {string} Resolved URL
*/
mw.libs.ve.resolveUrl = function ( url, base ) {
var node = base.createElement( 'a' );
node.setAttribute( 'href', url );
// If doc.baseURI isn't set, node.href will be an empty string
// This is crazy, returning the original URL is better
return node.href || url;
};
/**
* Decode a URI component into a mediawiki article title
*
@ -256,62 +273,82 @@ mw.libs.ve.getTargetDataFromHref = function ( href, doc ) {
return str.replace( /([.?*+^$[\]\\(){}|-])/g, '\\$1' );
}
function returnExternalData() {
return { isInternal: false };
}
function returnInternalData( titleish ) {
// This value doesn't necessarily come from Parsoid (and it might not have the "./" prefix), but
// this method will work fine.
var data = mw.libs.ve.parseParsoidResourceName( titleish );
data.isInternal = true;
return data;
}
var url = new URL( href, doc.baseURI );
var isInternal = null;
// Protocol relative href
var relativeHref = href.replace( /^https?:/i, '' );
var uri, queryLength;
// Equivalent to `ve.init.platform.getExternalLinkUrlProtocolsRegExp()`, which can not be called here
var externalLinkUrlProtocolsRegExp = new RegExp( '^(' + mw.config.get( 'wgUrlProtocols' ) + ')', 'i' );
// We don't want external links that don't start with a registered external URL protocol
// (to avoid generating 'javascript:' URLs), so treat it as internal
if ( !externalLinkUrlProtocolsRegExp.test( url.toString() ) ) {
return returnInternalData( url.toString() );
}
// Strip red link query parameters
if ( url.searchParams.get( 'action' ) === 'edit' && url.searchParams.get( 'redlink' ) === '1' ) {
url.searchParams.delete( 'action' );
url.searchParams.delete( 'redlink' );
}
// Count remaining query parameters
var keys = [];
url.searchParams.forEach( function ( val, key ) {
keys.push( key );
} );
var queryLength = keys.length;
var relativeHref = url.toString().replace( /^https?:/i, '' );
// Check if this matches the server's script path (as used by red links)
var scriptBase = new URL( mw.config.get( 'wgScript' ), doc.baseURI ).toString().replace( /^https?:/i, '' );
if ( relativeHref.indexOf( scriptBase ) === 0 ) {
if ( queryLength === 1 && url.searchParams.get( 'title' ) ) {
return returnInternalData( url.searchParams.get( 'title' ) + url.hash );
// Paths that don't start with a registered external url protocol
if ( !externalLinkUrlProtocolsRegExp.test( href ) ) {
isInternal = true;
if ( href.match( /^\.\// ) ) {
// The specific case of parsoid resource URIs, which are in the form `./Title`.
// If they're redlinks they now include a querystring which should be stripped.
try {
uri = new mw.Uri( href );
} catch ( e ) {
// probably an incorrecly encoded URI, try a very-naïve fallback
href = href.replace( /\?action=edit&redlink=1$/, '' );
}
if ( uri ) {
queryLength = Object.keys( uri.query ).length;
if (
( queryLength === 2 && uri.query.action === 'edit' && uri.query.redlink === '1' )
) {
uri.query = {};
href = '.' + uri.getRelativePath();
}
}
}
} else {
// Check if this matches the server's script path (as used by red links)
var scriptBase = mw.libs.ve.resolveUrl( mw.config.get( 'wgScript' ), doc ).replace( /^https?:/i, '' );
if ( relativeHref.indexOf( scriptBase ) === 0 ) {
try {
uri = new mw.Uri( relativeHref );
} catch ( e ) {
// probably an incorrectly encoded URI
}
if ( uri ) {
queryLength = Object.keys( uri.query ).length;
if (
( queryLength === 1 && uri.query.title ) ||
( queryLength === 3 && uri.query.title && uri.query.action === 'edit' && uri.query.redlink === '1' )
) {
href = uri.query.title + ( uri.fragment ? '#' + uri.fragment : '' );
isInternal = true;
} else if ( queryLength > 1 ) {
href = relativeHref;
isInternal = false;
}
}
}
if ( isInternal === null ) {
// Check if this matches the server's article path
var articleBase = mw.libs.ve.resolveUrl( mw.config.get( 'wgArticlePath' ), doc ).replace( /^https?:/i, '' );
var articleBaseRegex = new RegExp( regexEscape( articleBase ).replace( regexEscape( '$1' ), '(.*)' ) );
var matches = relativeHref.match( articleBaseRegex );
if ( matches && matches[ 1 ].split( '#' )[ 0 ].indexOf( '?' ) === -1 ) {
// Take the relative path
href = matches[ 1 ];
isInternal = true;
} else {
isInternal = false;
}
}
}
// Check if this matches the server's article path
var articleBase = new URL( mw.config.get( 'wgArticlePath' ), doc.baseURI ).toString().replace( /^https?:/i, '' );
var articleBaseRegex = new RegExp( regexEscape( articleBase ).replace( regexEscape( '$1' ), '(.*)' ) );
var matches = relativeHref.match( articleBaseRegex );
if ( matches ) {
if ( queryLength === 0 && matches && matches[ 1 ].split( '#' )[ 0 ].indexOf( '?' ) === -1 ) {
// Take the relative path
return returnInternalData( matches[ 1 ] );
}
if ( !isInternal ) {
return { isInternal: false };
}
// Doesn't match any of the known URL patterns, or has extra parameters
return returnExternalData();
// This href doesn't necessarily come from Parsoid (and it might not have the "./" prefix), but
// this method will work fine.
var data = mw.libs.ve.parseParsoidResourceName( href );
data.isInternal = true;
return data;
};
/**

View file

@ -7,28 +7,22 @@
QUnit.module( 've.dm.MWInternalLinkAnnotation', ve.test.utils.newMwEnvironment() );
QUnit.test( 'toDataElement', ( assert ) => {
// The expected data depends on site configuration, so we need to generate the cases several times.
const getCases = () => {
const
externalLink = ( href ) => {
const doc = ve.dm.mwExample.createExampleDocumentFromData( [], null, location.origin ),
externalLink = ( href ) => {
return () => {
const link = document.createElement( 'a' );
link.setAttribute( 'href', href );
return link;
},
internalLink = ( pageTitle, params ) => {
const link = document.createElement( 'a' );
link.setAttribute( 'href', mw.Title.newFromText( pageTitle ).getUrl( params ) );
return link;
},
parsoidLink = ( href ) => {
const link = document.createElement( 'a' );
if ( mw.config.get( 'wgArticlePath' ).includes( '?' ) ) {
href = href.replace( './', './index.php?title=' );
}
link.setAttribute( 'href', href );
return link;
};
return [
},
internalLink = ( pageTitle, params ) => {
return () => {
const link = document.createElement( 'a' );
link.setAttribute( 'href', location.origin + mw.Title.newFromText( pageTitle ).getUrl( params ) );
return link;
};
},
cases = [
{
msg: 'Not an internal link',
element: externalLink( 'http://example.com/' ),
@ -53,7 +47,7 @@ QUnit.test( 'toDataElement', ( assert ) => {
},
{
msg: 'Relative path',
element: parsoidLink( './Foo' ),
element: externalLink( './Foo' ),
expected: {
type: 'link/mwInternal',
attributes: {
@ -69,7 +63,7 @@ QUnit.test( 'toDataElement', ( assert ) => {
expected: {
type: 'link/mwExternal',
attributes: {
href: mw.Title.newFromText( 'Foo' ).getUrl( { action: 'history' } )
href: location.origin + mw.Title.newFromText( 'Foo' ).getUrl( { action: 'history' } )
}
}
},
@ -79,7 +73,7 @@ QUnit.test( 'toDataElement', ( assert ) => {
expected: {
type: 'link/mwExternal',
attributes: {
href: mw.Title.newFromText( 'Foo' ).getUrl( { diff: '3', oldid: '2' } )
href: location.origin + mw.Title.newFromText( 'Foo' ).getUrl( { diff: '3', oldid: '2' } )
}
}
},
@ -99,14 +93,24 @@ QUnit.test( 'toDataElement', ( assert ) => {
// Because percent-encoded URLs aren't valid titles, but what they decode to might be
msg: 'Percent encoded characters',
element: internalLink( 'Foo?' ),
expected: {
type: 'link/mwInternal',
attributes: {
lookupTitle: 'Foo?',
normalizedTitle: 'Foo?',
title: 'Foo?'
expected: [
{
type: 'link/mwInternal',
attributes: {
lookupTitle: 'Foo?',
normalizedTitle: 'Foo?',
title: 'Foo?'
}
},
{
type: 'link/mwInternal',
attributes: {
lookupTitle: 'Foo?',
normalizedTitle: 'Foo?',
title: 'Foo?'
}
}
}
]
},
{
// The fragment should make it into some parts of this, and not others
@ -134,77 +138,41 @@ QUnit.test( 'toDataElement', ( assert ) => {
}
}
}
];
};
],
converter = new ve.dm.Converter( ve.dm.modelRegistry, ve.dm.nodeFactory, ve.dm.annotationFactory, ve.dm.metaItemFactory );
const converter = new ve.dm.Converter( ve.dm.modelRegistry, ve.dm.nodeFactory, ve.dm.annotationFactory, ve.dm.metaItemFactory );
// toDataElement is called during a converter run, so we need to fake up a bit of state to test it.
// This would normally be done by ve.dm.converter.getModelFromDom.
converter.doc = doc.getHtmlDocument();
converter.targetDoc = doc.getHtmlDocument();
converter.store = doc.getStore();
converter.internalList = doc.getInternalList();
converter.contextStack = [];
converter.fromClipboard = true;
const articlePaths = [
{
// MediaWiki config settings:
// $wgScriptPath = '/w';
// $wgUsePathInfo = false;
msg: 'query string URL',
config: {
wgServer: 'http://example.org',
wgScript: '/w/index.php',
wgArticlePath: '/w/index.php?title=$1'
},
// Parsoid-generated <base href> given these MediaWiki config settings:
base: 'http://example.org/w/'
wgArticlePath: mw.config.get( 'wgScriptPath' ) + '/index.php?title=$1'
},
{
// MediaWiki config settings:
// $wgScriptPath = '/w';
// $wgUsePathInfo = true;
msg: 'short URL using pathinfo',
config: {
wgServer: 'http://example.org',
wgScript: '/w/index.php',
wgArticlePath: '/w/index.php/$1'
},
// Parsoid-generated <base href> given these MediaWiki config settings:
base: 'http://example.org/w/index.php/'
},
{
// MediaWiki config settings:
// $wgScriptPath = '/w';
// $wgArticlePath = '/wiki/$1';
msg: 'proper short URL',
config: {
wgServer: 'http://example.org',
wgScript: '/w/index.php',
wgArticlePath: '/wiki/$1'
},
// Parsoid-generated <base href> given these MediaWiki config settings:
base: 'http://example.org/wiki/'
msg: 'short URL',
wgArticlePath: mw.config.get( 'wgScriptPath' ) + '/index.php/$1'
}
];
articlePaths.forEach( function ( pathData ) {
// Set up global state (site configuration)
mw.config.set( pathData.config );
const doc = ve.dm.mwExample.createExampleDocumentFromData( [], null, pathData.base );
// toDataElement is called during a converter run, so we need to fake up a bit of state to test it.
// This would normally be done by ve.dm.converter.getModelFromDom.
converter.doc = doc.getHtmlDocument();
converter.targetDoc = doc.getHtmlDocument();
converter.store = doc.getStore();
converter.internalList = doc.getInternalList();
converter.contextStack = [];
converter.fromClipboard = true;
// Generate test cases for this site configuration
const cases = getCases();
for ( let i = 0; i < cases.length; i++ ) {
for ( let i = 0; i < cases.length; i++ ) {
articlePaths.forEach( function ( pathData, j ) {
mw.config.set( 'wgArticlePath', pathData.wgArticlePath );
assert.deepEqual(
ve.dm.MWInternalLinkAnnotation.static.toDataElement( [ cases[ i ].element ], converter ),
cases[ i ].expected,
ve.dm.MWInternalLinkAnnotation.static.toDataElement( [ cases[ i ].element() ], converter ),
Array.isArray( cases[ i ].expected ) ?
cases[ i ].expected[ j ] :
cases[ i ].expected,
cases[ i ].msg + ': ' + pathData.msg
);
}
} );
} );
}
} );
QUnit.test( 'getFragment', ( assert ) => {

View file

@ -223,7 +223,7 @@ ve.dm.mwExample.MWTransclusion.mixedStoreItems[ ve.dm.HashValueStore.prototype.h
$.parseHTML( ve.dm.mwExample.MWTransclusion.mixed );
ve.dm.mwExample.MWInternalLink = {
absoluteHref: new URL( './Foo/Bar', ve.dm.mwExample.baseUri ).toString()
absoluteHref: new URL( '/wiki/Foo/Bar', ve.dm.example.baseUri ).toString()
};
ve.dm.mwExample.MWInternalLink.absoluteOpen = '<a rel="mw:WikiLink" href="' + ve.dm.mwExample.MWInternalLink.absoluteHref + '">';
@ -237,7 +237,7 @@ ve.dm.mwExample.MWInternalLink.absoluteData = {
};
ve.dm.mwExample.MWInternalSectionLink = {
absoluteHref: new URL( './Foo#Bar', ve.dm.mwExample.baseUri ).toString()
absoluteHref: new URL( '/wiki/Foo#Bar', ve.dm.example.baseUri ).toString()
};
ve.dm.mwExample.MWInternalSectionLink.absoluteOpen = '<a rel="mw:WikiLink" href="' + ve.dm.mwExample.MWInternalSectionLink.absoluteHref + '">';
@ -1560,7 +1560,7 @@ ve.dm.mwExample.domToDataCases = {
},
'internal link with absolute path': {
body: '<p>' + ve.dm.mwExample.MWInternalLink.absoluteOpen + 'Foo</a></p>',
base: ve.dm.mwExample.baseUri,
base: ve.dm.example.baseUri,
data: [
{ type: 'paragraph' },
[
@ -1586,7 +1586,7 @@ ve.dm.mwExample.domToDataCases = {
},
'internal link with absolute path and section': {
body: '<p>' + ve.dm.mwExample.MWInternalSectionLink.absoluteOpen + 'Foo</a></p>',
base: ve.dm.mwExample.baseUri,
base: ve.dm.example.baseUri,
data: [
{ type: 'paragraph' },
[
@ -1612,7 +1612,7 @@ ve.dm.mwExample.domToDataCases = {
},
'internal link with href set to ./': {
body: '<p><a rel="mw:WikiLink" href="./">x</a></p>',
base: ve.dm.mwExample.baseUri,
base: 'http://example.com',
data: [
{ type: 'paragraph' },
[
@ -1634,7 +1634,7 @@ ve.dm.mwExample.domToDataCases = {
'internal link with special characters': {
body: '<p><a rel="mw:WikiLink" href="./Foo%3F+%25&Bar">x</a></p>',
ignoreXmlWarnings: true,
base: ve.dm.mwExample.baseUri,
base: 'http://example.com',
data: [
{ type: 'paragraph' },
[
@ -1860,7 +1860,7 @@ ve.dm.mwExample.domToDataCases = {
'<meta property="mw:bar" content="baz" />' + ve.dm.example.commentNodePreview( 'barbaz' ) +
'<link rel="mw:PageProp/Category" href="./Category:Foo_foo#Bar%20baz%23quux" />' +
'<meta typeof="mw:Placeholder" data-parsoid="foobar" />',
base: ve.dm.mwExample.baseUri,
base: 'http://example.com',
data: ve.dm.mwExample.withMeta,
realData: ve.dm.mwExample.withMetaRealData
},
@ -2137,7 +2137,7 @@ ve.dm.mwExample.domToDataCases = {
' foo <a rel="mw:WikiLink" href="./Bar">bar</a> baz' +
'</figcaption>' +
'</figure>',
base: ve.dm.mwExample.baseUri,
base: 'http://example.com',
data: [
{
type: 'mwBlockImage',
@ -2269,7 +2269,7 @@ ve.dm.mwExample.domToDataCases = {
'plain internal links when pasted are converted to link/mwInternal': {
fromClipboard: true,
body: '<a href="' + ve.dm.mwExample.MWInternalLink.absoluteHref + '">ab</a>',
base: ve.dm.mwExample.baseUri,
base: ve.dm.example.baseUri,
data: [
{
type: 'paragraph',

View file

@ -122,38 +122,6 @@ QUnit.test( 'getTargetDataFromHref', ( assert ) => {
isInternal: true
}
},
{
msg: 'Old parser link',
href: '/wiki/Foo',
expected: {
title: 'Foo',
isInternal: true
}
},
{
msg: 'Old parser red link',
href: '/w/index.php?title=Foo&action=edit&redlink=1',
expected: {
title: 'Foo',
isInternal: true
}
},
{
msg: 'Old parser link with fragment',
href: '/wiki/Foo#Bar',
expected: {
title: 'Foo#Bar',
isInternal: true
}
},
{
msg: 'Old parser red link with fragment (old parser does not actually generate links like this, but we recognize them)',
href: '/w/index.php?title=Foo&action=edit&redlink=1#Bar',
expected: {
title: 'Foo#Bar',
isInternal: true
}
},
{
msg: 'Full URL link to current wiki',
href: 'http://example.com/wiki/Foo',

View file

@ -259,9 +259,6 @@ QUnit.test( 'convert', function ( assert ) {
}
];
mw.config.set( {
wgArticlePath: '/wiki/$1'
} );
for ( let i = 0; i < cases.length; i++ ) {
ve.test.utils.runWikitextStringHandlerTest(
assert, this.server, cases[ i ].pasteString, cases[ i ].pasteType, cases[ i ].parsoidResponse,