Merge "Centralize Parsoid resource name parsing logic"

This commit is contained in:
jenkins-bot 2018-02-28 15:09:20 +00:00 committed by Gerrit Code Review
commit 9840316ee9
7 changed files with 58 additions and 31 deletions

View file

@ -102,7 +102,7 @@ ve.dm.MWInternalLinkAnnotation.static.newFromTitle = function ( title, rawTitle
* True if the href pointed to the local wiki, false if href is external
*/
ve.dm.MWInternalLinkAnnotation.static.getTargetDataFromHref = function ( href, doc ) {
var relativeBase, relativeBaseRegex, relativeHref, isInternal, matches;
var relativeBase, relativeBaseRegex, relativeHref, isInternal, matches, data;
function regexEscape( str ) {
return str.replace( /([.?*+^$[\]\\(){}|-])/g, '\\$1' );
@ -124,18 +124,14 @@ ve.dm.MWInternalLinkAnnotation.static.getTargetDataFromHref = function ( href, d
isInternal = true;
}
// The href is simply the title, unless we're dealing with a page that has slashes in its name
// in which case it's preceded by one or more instances of "./" or "../", so strip those
matches = href.match( /^((?:\.\.?\/)*)(.*)$/ );
// This href doesn't necessarily come from Parsoid (and it might not have the "./" prefix), but
// this method will work fine.
data = ve.parseParsoidResourceName( href );
// Percent-encoded characters are forbidden in titles... but if we're
// copy/pasting URLs around, they're likely to wind up encoded at this
// point. So decode them, otherwise this is going to cause failures
// elsewhere.
return {
title: ve.decodeURIComponentIntoArticleTitle( matches[ 2 ] ),
rawTitle: matches[ 2 ],
hrefPrefix: matches[ 1 ],
title: data.title,
rawTitle: data.rawTitle,
hrefPrefix: data.hrefPrefix,
isInternal: isInternal
};
};

View file

@ -34,16 +34,17 @@ ve.dm.MWCategoryMetaItem.static.matchRdfaTypes = [ 'mw:PageProp/Category' ];
ve.dm.MWCategoryMetaItem.static.toDataElement = function ( domElements ) {
var href = domElements[ 0 ].getAttribute( 'href' ),
matches = href.match( /^((?:\.\.?\/)*)(.*?)(?:#(.*))?$/ ),
rawSortkey = matches[ 3 ] || '';
data = ve.parseParsoidResourceName( href ),
rawTitleAndFragment = data.rawTitle.match( /^(.*?)(?:#(.*))?$/ ),
titleAndFragment = data.title.match( /^(.*?)(?:#(.*))?$/ );
return {
type: this.name,
attributes: {
hrefPrefix: matches[ 1 ],
category: ve.decodeURIComponentIntoArticleTitle( matches[ 2 ] ),
origCategory: matches[ 2 ],
sortkey: ve.decodeURIComponentIntoArticleTitle( rawSortkey ),
origSortkey: rawSortkey
hrefPrefix: data.hrefPrefix,
category: titleAndFragment[ 1 ],
origCategory: rawTitleAndFragment[ 1 ],
sortkey: titleAndFragment[ 2 ] || '',
origSortkey: rawTitleAndFragment[ 2 ] || ''
}
};
};

View file

@ -179,7 +179,7 @@ ve.dm.MWImageModel.static.newFromImageAttributes = function ( attrs, parentDoc )
imgModel.cacheOriginalImageAttributes( attrs );
imgModel.setImageSource( attrs.src );
imgModel.setFilename( new mw.Title( attrs.resource.replace( /^(\.\.?\/)*/, '' ) ).getMainText() );
imgModel.setFilename( new mw.Title( ve.normalizeParsoidResourceName( attrs.resource ) ).getMainText() );
imgModel.setImageHref( attrs.href );
// Set bounding box
@ -283,7 +283,7 @@ ve.dm.MWImageModel.prototype.changeImageSource = function ( attrs, APIinfo ) {
}
if ( attrs.resource ) {
this.setImageResourceName( attrs.resource );
this.setFilename( new mw.Title( attrs.resource.replace( /^(\.\.?\/)*/, '' ) ).getMainText() );
this.setFilename( new mw.Title( ve.normalizeParsoidResourceName( attrs.resource ) ).getMainText() );
}
if ( attrs.src ) {
@ -1132,7 +1132,7 @@ ve.dm.MWImageModel.prototype.getImageHref = function () {
* @param {ve.dm.Scalable} scalable Scalable object
*/
ve.dm.MWImageModel.prototype.attachScalable = function ( scalable ) {
var imageName = this.getResourceName().replace( /^(\.\.?\/)*/, '' ),
var imageName = ve.normalizeParsoidResourceName( this.getResourceName() ),
imageModel = this;
if ( this.scalable instanceof ve.dm.Scalable ) {
@ -1173,7 +1173,7 @@ ve.dm.MWImageModel.prototype.attachScalable = function ( scalable ) {
/**
* Set the filename of the current image
*
* @param {string} filename Image filename
* @param {string} filename Image filename (without namespace)
*/
ve.dm.MWImageModel.prototype.setFilename = function ( filename ) {
this.filename = filename;
@ -1182,7 +1182,7 @@ ve.dm.MWImageModel.prototype.setFilename = function ( filename ) {
/**
* Get the filename of the current image
*
* @return {string} filename Image filename
* @return {string} filename Image filename (without namespace)
*/
ve.dm.MWImageModel.prototype.getFilename = function () {
return this.filename;

View file

@ -25,7 +25,7 @@ ve.dm.MWTemplateModel = function VeDmMWTemplateModel( transclusion, target ) {
this.target = target;
// TODO: Either here or in uses of this constructor we need to validate the title
this.title = ( target.href && target.href.replace( /^(\.\.?\/)*/, '' ) ) || null;
this.title = target.href ? ve.normalizeParsoidResourceName( target.href ) : null;
this.sequence = null;
this.params = {};
this.spec = new ve.dm.MWTemplateSpecModel( this );

View file

@ -296,14 +296,10 @@ ve.dm.MWImageNode.prototype.onAttributeChange = function ( key, from, to ) {
/**
* Get the normalised filename of the image
*
* @return {string} Filename
* @return {string} Filename (including namespace)
*/
ve.dm.MWImageNode.prototype.getFilename = function () {
// Strip ./ stuff and decode URI encoding
var resource = this.getAttribute( 'resource' ) || '',
filename = resource.replace( /^(\.\.?\/)*/, '' );
return ve.decodeURIComponentIntoArticleTitle( filename, true );
return ve.normalizeParsoidResourceName( this.getAttribute( 'resource' ) || '' );
};
/**

View file

@ -391,7 +391,7 @@ ve.dm.MWTransclusionNode.prototype.getPartsList = function () {
part = content.parts[ i ];
if ( part.template ) {
href = part.template.target.href;
page = href ? ve.decodeURIComponentIntoArticleTitle( href.replace( /^(\.\.?\/)*/, '' ) ) : null;
page = href ? ve.normalizeParsoidResourceName( href ) : null;
this.partsList.push( {
template: part.template.target.wt,
templatePage: page

View file

@ -95,3 +95,37 @@ ve.expandModuleNames = function ( moduleNames ) {
} );
return modules;
};
/**
* Split Parsoid resource name into the href prefix and the page title.
*
* @param {string} resourceName Resource name, from a `href` or `resource` attribute
* @return {Object} Object with the following properties:
* @return {string} return.title Full page title in text form (with namespace, and spaces instead of underscores)
* @return {string} return.hrefPrefix Href prefix like './' or '../'
* @return {string} return.rawTitle Everything following `hrefPrefix` in input, unprocessed
*/
ve.parseParsoidResourceName = function ( resourceName ) {
// Resource names are always prefixed with './' to prevent the MediaWiki namespace from being
// interpreted as a URL protocol, consider e.g. 'href="./File:Foo.png"'. If this resource name
// came from a page that is a subpage, it is also prefixed with appropriate number of '../'.
// (We accept input without the prefix, so this can also take plain page titles.)
var matches = resourceName.match( /^((?:\.\.?\/)*)(.*)$/ );
return {
// '%' and '?' are valid in page titles, but normally URI-encoded. This also changes underscores
// to spaces.
title: ve.decodeURIComponentIntoArticleTitle( matches[ 2 ] ),
rawTitle: matches[ 2 ],
hrefPrefix: matches[ 1 ]
};
};
/**
* Extract the page title from a Parsoid resource name.
*
* @param {string} resourceName Resource name, from a `href` or `resource` attribute
* @return {string} Full page title in text form (with namespace, and spaces instead of underscores)
*/
ve.normalizeParsoidResourceName = function ( resourceName ) {
return ve.parseParsoidResourceName( resourceName ).title;
};