Extract rendering/parsing mediawiki responses into separate class

Page Previews should be able to consume HTML response generated by MediaWiki. First we need to move out plain text crunching from renderer.js and model.js. Mediawiki and Restbase gateways will have to parse/htmlize plaintext into nice HTML by themselves. Bug: T165018 Change-Id: I5d7e9f610bb809aa9fb035a4a9f96e9e8796c9d8
2024-11-23 23:24:39 +00:00 · 2017-06-08 02:58:30 +02:00 · 2017-06-08 02:58:30 +02:00 · ef283c2509
parent b16a6fe735
commit ef283c2509
12 changed files with 272 additions and 221 deletions
--- a/resources/dist/index.js
+++ b/resources/dist/index.js
--- a/resources/dist/index.js.map
+++ b/resources/dist/index.js.map
--- a/src/formatter.js
+++ b/src/formatter.js
@ -0,0 +1,124 @@
+var $ = jQuery,
+	mw = window.mediaWiki;
+
+/**
+ * Improves the plain text extracts
+ * @param {String} plainTextExtract
+ * @param {String} title
+ * @returns {Array}
+ */
+function htmlize( plainTextExtract, title ) {
+	var extract = plainTextExtract;
+	if ( plainTextExtract === undefined ) {
+		return [];
+	}
+	extract = removeParentheticals( extract );
+	extract = removeEllipsis( extract );
+	extract = makeTitleInExtractBold( extract, title );
+	return extract;
+}
+
+/**
+ * Converts the extract into a list of elements, which correspond to fragments
+ * of the extract. Fragments that match the title verbatim are wrapped in a
+ * `<b>` element.
+ *
+ * Using the bolded elements of the extract of the page directly is covered by
+ * [T141651](https://phabricator.wikimedia.org/T141651).
+ *
+ * Extracted from `mw.popups.renderer.article.getProcessedElements`.
+ *
+ * @param {String} extract
+ * @param {String} title
+ * @return {Array} A set of HTML Elements
+ */
+function makeTitleInExtractBold( extract, title ) {
+	var regExp, escapedTitle,
+		elements = [],
+		boldIdentifier = '<bi-' + Math.random() + '>',
+		snip = '<snip-' + Math.random() + '>';
+
+	title = title.replace( /\s+/g, ' ' ).trim(); // Remove extra white spaces
+	escapedTitle = mw.RegExp.escape( title ); // Escape RegExp elements
+	regExp = new RegExp( '(^|\\s)(' + escapedTitle + ')(|$)', 'i' );
+
+	// Remove text in parentheses along with the parentheses
+	extract = extract.replace( /\s+/, ' ' ); // Remove extra white spaces
+
+	// Make title bold in the extract text
+	// As the extract is html escaped there can be no such string in it
+	// Also, the title is escaped of RegExp elements thus can't have "*"
+	extract = extract.replace( regExp, '$1' + snip + boldIdentifier + '$2' + snip + '$3' );
+	extract = extract.split( snip );
+
+	$.each( extract, function ( index, part ) {
+		if ( part.indexOf( boldIdentifier ) === 0 ) {
+			elements.push( $( '<b>' ).text( part.substring( boldIdentifier.length ) ) );
+		} else {
+			elements.push( document.createTextNode( part ) );
+		}
+	} );
+
+	return elements;
+}
+
+/**
+ * Removes the trailing ellipsis from the extract, if it's there.
+ *
+ * This function was extracted from
+ * `mw.popups.renderer.article#removeEllipsis`.
+ *
+ * @param {String} extract
+ * @return {String}
+ */
+function removeEllipsis( extract ) {
+	return extract.replace( /\.\.\.$/, '' );
+}
+
+/**
+ * Removes parentheticals from the extract.
+ *
+ * If the parenthesis are unbalanced or out of order, then the extract is
+ * returned without further processing.
+ *
+ * This function was extracted from
+ * `mw.popups.renderer.article#removeParensFromText`.
+ *
+ * @param {String} extract
+ * @return {String}
+ */
+function removeParentheticals( extract ) {
+	var
+		ch,
+		result = '',
+		level = 0,
+		i = 0;
+
+	for ( i; i < extract.length; i++ ) {
+		ch = extract.charAt( i );
+
+		if ( ch === ')' && level === 0 ) {
+			return extract;
+		}
+		if ( ch === '(' ) {
+			level++;
+			continue;
+		} else if ( ch === ')' ) {
+			level--;
+			continue;
+		}
+		if ( level === 0 ) {
+			// Remove leading spaces before brackets
+			if ( ch === ' ' && extract.charAt( i + 1 ) === '(' ) {
+				continue;
+			}
+			result += ch;
+		}
+	}
+
+	return ( level === 0 ) ? result : extract;
+}
+
+module.exports = {
+	htmlize: htmlize
+};
--- a/src/gateway/mediawiki.js
+++ b/src/gateway/mediawiki.js
@ -13,7 +13,9 @@
 //
 // FIXME: Move this to src/constants.js.
 var CACHE_LIFETIME = 300,
-	createModel = require( '../preview/model' ).createModel;
+	createModel = require( '../preview/model' ).createModel,
+	plainTextHTMLizer = require( '../formatter' ).htmlize,
+	$ = jQuery;

 /**
 * Creates an instance of the MediaWiki API gateway.
@ -69,6 +71,7 @@ module.exports = function createMediaWikiApiGateway( api, config ) {
 	function getPageSummary( title ) {
 		return fetch( title )
 			.then( extractPageFromResponse )
+			.then( htmlize )
 			.then( convertPageToModel );
 	}

@ -76,7 +79,8 @@ module.exports = function createMediaWikiApiGateway( api, config ) {
 		fetch: fetch,
 		extractPageFromResponse: extractPageFromResponse,
 		convertPageToModel: convertPageToModel,
-		getPageSummary: getPageSummary
+		getPageSummary: getPageSummary,
+		htmlize: htmlize
 	};
 };

@ -102,6 +106,20 @@ function extractPageFromResponse( data ) {
 	throw new Error( 'API response `query.pages` is empty.' );
 }

+/**
+ * HTMLize plain text response
+ *
+ * @function
+ * @name MediaWikiGateway#htmlize
+ * @param {Object} data The response
+ * @returns {Object}
+ */
+function htmlize( data ) {
+	var result = $.extend( {}, data );
+	result.extract = plainTextHTMLizer( data.extract, data.title );
+	return result;
+}
+
 /**
 * Converts the API response to a preview model.
 *
--- a/src/gateway/rest.js
+++ b/src/gateway/rest.js
@ -5,6 +5,7 @@
 var RESTBASE_ENDPOINT = '/api/rest_v1/page/summary/',
 	RESTBASE_PROFILE = 'https://www.mediawiki.org/wiki/Specs/Summary/1.2.0',
 	createModel = require( '../preview/model' ).createModel,
+	plainTextHTMLizer = require( '../formatter' ).htmlize,
 	mw = window.mediaWiki,
 	$ = jQuery;

@ -155,7 +156,7 @@ function convertPageToModel( page, thumbSize ) {
 		new mw.Title( page.title ).getUrl(),
 		page.lang,
 		page.dir,
-		page.extract,
+		plainTextHTMLizer( page.extract, page.title ),
 		page.thumbnail ? generateThumbnailData( page.thumbnail, page.originalimage, thumbSize ) : undefined
 	);
 }
--- a/src/preview/model.js
+++ b/src/preview/model.js
@ -21,7 +21,7 @@ exports.TYPE_PAGE = TYPE_PAGE;
 * @property {String} url The canonical URL of the page being previewed
 * @property {String} languageCode
 * @property {String} languageDirection Either "ltr" or "rtl"
- * @property {?String} extract `undefined` if the extract isn't
+ * @property {?Array} extract `undefined` if the extract isn't
 *  viable, e.g. if it's empty after having ellipsis and parentheticals
 *  removed
 * @property {String} type Either "EXTRACT" or "GENERIC"
@ -37,7 +37,7 @@ exports.TYPE_PAGE = TYPE_PAGE;
 * @param {String} url The canonical URL of the page being previewed
 * @param {String} languageCode
 * @param {String} languageDirection Either "ltr" or "rtl"
- * @param {String} extract
+ * @param {?Array} extract
 * @param {?Object} thumbnail
 * @return {PreviewModel}
 */
@ -67,79 +67,14 @@ exports.createModel = function createModel(
 * module.
 *
 * If the extract is `undefined`, `null`, or empty, then `undefined` is
- * returned. Otherwise, parentheticals and trailing ellipsis are removed. If
- * after processing the extract is empty, then `undefined` is returned.
+ * returned.
 *
- * @param {?String} extract
+ * @param {?Array} extract
 * @return {?String}
 */
 function processExtract( extract ) {
-	var result;
-
-	if ( extract === undefined || extract === '' ) {
+	if ( extract === undefined || extract.length === 0 ) {
 		return undefined;
 	}
-
-	result = extract;
-	result = removeParentheticals( result );
-	result = removeEllipsis( result );
-
-	return result.length > 0 ? result : undefined;
-}
-
-/**
- * Removes the trailing ellipsis from the extract, if it's there.
- *
- * This function was extracted from
- * `mw.popups.renderer.article#removeEllipsis`.
- *
- * @param {String} extract
- * @return {String}
- */
-function removeEllipsis( extract ) {
-	return extract.replace( /\.\.\.$/, '' );
-}
-
-/**
- * Removes parentheticals from the extract.
- *
- * If the parenthesis are unbalanced or out of order, then the extract is
- * returned without further processing.
- *
- * This function was extracted from
- * `mw.popups.renderer.article#removeParensFromText`.
- *
- * @param {String} extract
- * @return {String}
- */
-function removeParentheticals( extract ) {
-	var
-		ch,
-		result = '',
-		level = 0,
-		i = 0;
-
-	for ( i; i < extract.length; i++ ) {
-		ch = extract.charAt( i );
-
-		if ( ch === ')' && level === 0 ) {
-			return extract;
-		}
-		if ( ch === '(' ) {
-			level++;
-			continue;
-		} else if ( ch === ')' ) {
-			level--;
-			continue;
-		}
-		if ( level === 0 ) {
-			// Remove leading spaces before brackets
-			if ( ch === ' ' && extract.charAt( i + 1 ) === '(' ) {
-				continue;
-			}
-			result += ch;
-		}
-	}
-
-	return ( level === 0 ) ? result : extract;
+	return extract;
 }
--- a/src/renderer.js
+++ b/src/renderer.js
@ -140,12 +140,7 @@ function createPreview( model ) {
 	var templateData,
 		thumbnail = createThumbnail( model.thumbnail ),
 		hasThumbnail = thumbnail !== null,
-
-		// FIXME: This should probably be moved into the gateway as we'll soon be
-		// fetching HTML from the API. See
-		// https://phabricator.wikimedia.org/T141651 for more detail.
-		extract = renderExtract( model.extract, model.title ),
-
+		extract = model.extract,
 		$el;

 	templateData = $.extend( {}, model, {
@ -158,8 +153,7 @@ function createPreview( model ) {
 	if ( hasThumbnail ) {
 		$el.find( '.mwe-popups-discreet' ).append( thumbnail.el );
 	}
-
-	if ( extract.length ) {
+	if ( extract ) {
 		$el.find( '.mwe-popups-extract' ).append( extract );
 	}

@ -201,50 +195,6 @@ function createEmptyPreview( model ) {
 	};
 }

-/**
- * Converts the extract into a list of elements, which correspond to fragments
- * of the extract. Fragements that match the title verbatim are wrapped in a
- * `<b>` element.
- *
- * Using the bolded elements of the extract of the page directly is covered by
- * [T141651](https://phabricator.wikimedia.org/T141651).
- *
- * Extracted from `mw.popups.renderer.article.getProcessedElements`.
- *
- * @param {String} extract
- * @param {String} title
- * @return {Array}
- */
-function renderExtract( extract, title ) {
-	var regExp, escapedTitle,
-		elements = [],
-		boldIdentifier = '<bi-' + Math.random() + '>',
-		snip = '<snip-' + Math.random() + '>';
-
-	title = title.replace( /\s+/g, ' ' ).trim(); // Remove extra white spaces
-	escapedTitle = mw.RegExp.escape( title ); // Escape RegExp elements
-	regExp = new RegExp( '(^|\\s)(' + escapedTitle + ')(|$)', 'i' );
-
-	// Remove text in parentheses along with the parentheses
-	extract = extract.replace( /\s+/, ' ' ); // Remove extra white spaces
-
-	// Make title bold in the extract text
-	// As the extract is html escaped there can be no such string in it
-	// Also, the title is escaped of RegExp elements thus can't have "*"
-	extract = extract.replace( regExp, '$1' + snip + boldIdentifier + '$2' + snip + '$3' );
-	extract = extract.split( snip );
-
-	$.each( extract, function ( index, part ) {
-		if ( part.indexOf( boldIdentifier ) === 0 ) {
-			elements.push( $( '<b>' ).text( part.substring( boldIdentifier.length ) ) );
-		} else {
-			elements.push( document.createTextNode( part ) );
-		}
-	} );
-
-	return elements;
-}
-
 /**
 * Shows the preview.
 *
@ -746,7 +696,6 @@ module.exports = {
 	hide: hide,
 	createThumbnail: createThumbnail,
 	createThumbnailElement: createThumbnailElement,
-	renderExtract: renderExtract,
 	createLayout: createLayout,
 	getClasses: getClasses,
 	layoutPreview: layoutPreview,
--- a/tests/node-qunit/formatter.test.js
+++ b/tests/node-qunit/formatter.test.js
@ -0,0 +1,94 @@
+var $ = jQuery,
+	formatter = require( '../../src/formatter' );
+
+QUnit.module( 'ext.popups.formatter', {
+	beforeEach: function () {
+		window.mediaWiki.RegExp = {
+			escape: this.sandbox.spy( function ( str ) {
+				return str.replace( /([\\{}()|.?*+\-\^$\[\]])/g, '\\$1' );
+			} )
+		};
+	},
+	afterEach: function () {
+		window.mediaWiki.RegExp = null;
+	}
+} );
+
+QUnit.test( 'Title is bold', function ( assert ) {
+	var cases = [
+		[
+			'Isaac Newton was born in', 'Isaac Newton',
+			'<b>Isaac Newton</b> was born in',
+			'Title as first word'
+		],
+		[
+			'The C* language not to be confused with C# or C', 'C*',
+			'The <b>C*</b> language not to be confused with C# or C',
+			'Title containing *'
+		],
+		[
+			'I like trains', 'Train',
+			'I like <b>train</b>s',
+			'Make the simple plural bold'
+		],
+		[
+			'Foo\'s pub is a pub in Bar', 'Foo\'s pub',
+			'<b>Foo\'s pub</b> is a pub in Bar',
+			'Correct escaping'
+		],
+		[
+			'\"Heroes\" is a David Bowie album', '\"Heroes\"',
+			'<b>\"Heroes\"</b> is a David Bowie album',
+			'Quotes in title'
+		],
+		[
+			'*Testing if Things are correctly identified', 'Things',
+			'*Testing if <b>Things</b> are correctly identified',
+			'Article that begins with asterisk'
+		],
+		[
+			'Testing if repeated words are not matched when repeated', 'Repeated',
+			'Testing if <b>repeated</b> words are not matched when repeated',
+			'Repeated title'
+		]
+	];
+
+	function test( extract, title, expected, msg ) {
+		var $div = $( '<div>' ).append(
+			formatter.htmlize( extract, title )
+		);
+		assert.equal( $div.html(), expected, msg );
+	}
+
+	cases.forEach( function ( case_ ) {
+		test( case_[ 0 ], case_[ 1 ], case_[ 2 ], case_[ 3 ] );
+	} );
+} );
+
+QUnit.test( 'it strips ellipsis and parentheticals', function ( assert ) {
+	var i, testCase, cases = [
+		// removeEllipsis
+		[ 'Extract...', 'Extract' ],
+		[ 'Extract.', 'Extract.' ],
+		[ '..Extract..', '..Extract..' ],
+		[ '...', '' ],
+
+		// removeParentheticals
+		[ 'Foo', 'Foo' ],
+		[ 'Foo (', 'Foo (' ],
+		[ 'Foo (Bar)', 'Foo' ],
+		[ 'Foo (Bar))', 'Foo (Bar))' ],
+		[ 'Foo )(Bar)', 'Foo )(Bar)' ],
+		[ '(Bar)', '' ]
+		], $div;
+
+	for ( i = 0; i < cases.length; i++ ) {
+		testCase = cases[ i ];
+
+		$div = $( '<div>' ).append(
+			formatter.htmlize( testCase[ 0 ], 'Test' )
+		);
+
+		assert.equal( $div.html(), testCase[ 1 ] );
+	}
+} );
--- a/tests/node-qunit/gateway/mediawiki.test.js
+++ b/tests/node-qunit/gateway/mediawiki.test.js
@ -39,7 +39,7 @@ var createModel = require( '../../../src/preview/model' ).createModel,
 		'https://en.wikipedia.org/wiki/Rick_Astley',
 		'en',
 		'ltr',
-		'Richard Paul "Rick" Astley is an English singer, songwriter, musician, and radio personality. His 1987 song, "Never Gonna Give You Up" was a No. 1 hit single in 25 countries. By the time of his retirement in 1993, Astley had sold approximately 40 million records worldwide.\nAstley made a comeback in 2007, becoming an Internet phenomenon when his video "Never Gonna Give You Up" became integral to the meme known as "rickrolling". Astley was voted "Best Act Ever" by Internet users at the',
+		[ document.createTextNode( 'Richard Paul "Rick" Astley is an English singer, songwriter, musician, and radio personality. His 1987 song, "Never Gonna Give You Up" was a No. 1 hit single in 25 countries. By the time of his retirement in 1993, Astley had sold approximately 40 million records worldwide.\nAstley made a comeback in 2007, becoming an Internet phenomenon when his video "Never Gonna Give You Up" became integral to the meme known as "rickrolling". Astley was voted "Best Act Ever" by Internet users at the' ) ],
 		{
 			height: 300,
 			source: 'https://upload.wikimedia.org/wikipedia/commons/thumb/6/6d/Rick_Astley_-_Pepsifest_2009.jpg/200px-Rick_Astley_-_Pepsifest_2009.jpg',
@ -47,7 +47,18 @@ var createModel = require( '../../../src/preview/model' ).createModel,
 		}
 	);

-QUnit.module( 'ext.popups/gateway/mediawiki' );
+QUnit.module( 'ext.popups/gateway/mediawiki', {
+	beforeEach: function () {
+		window.mediaWiki.RegExp = {
+			escape: this.sandbox.spy( function ( str ) {
+				return str.replace( /([\\{}()|.?*+\-\^$\[\]])/g, '\\$1' );
+			} )
+		};
+	},
+	afterEach: function () {
+		window.mediaWiki.RegExp = null;
+	}
+} );

 QUnit.test( 'MediaWiki API gateway is called with correct arguments', function ( assert ) {
 	var spy = this.sandbox.spy(),
@ -135,17 +146,7 @@ QUnit.test( 'MediaWiki API gateway is correctly converting the page data to a mo
 		page = gateway.extractPageFromResponse( MEDIAWIKI_API_RESPONSE );

 	assert.deepEqual(
-		gateway.convertPageToModel( page ),
-		MEDIAWIKI_API_RESPONSE_PREVIEW_MODEL
-	);
-} );
-
-QUnit.test( 'banana', function ( assert ) {
-	var gateway = createMediaWikiApiGateway(),
-		page = gateway.extractPageFromResponse( MEDIAWIKI_API_RESPONSE );
-
-	assert.deepEqual(
-		gateway.convertPageToModel( page ),
+		gateway.convertPageToModel( gateway.htmlize( page ) ),
 		MEDIAWIKI_API_RESPONSE_PREVIEW_MODEL
 	);
 } );
--- a/tests/node-qunit/gateway/rest.test.js
+++ b/tests/node-qunit/gateway/rest.test.js
@ -97,7 +97,7 @@ var createModel = require( '../../../src/preview/model' ).createModel,
 		'url/Barack Obama', // Generated in the stub below
 		'en',
 		'ltr',
-		'Barack Hussein Obama II born August 4, 1961) ...',
+		[ document.createTextNode( 'Barack Hussein Obama II born August 4, 1961) ' ) ],
 		{
 			source: 'https://upload.wikimedia.org/wikipedia/commons/thumb/8/8d/President_Barack_Obama.jpg/409px-President_Barack_Obama.jpg',
 			width: 409,
@ -107,12 +107,18 @@ var createModel = require( '../../../src/preview/model' ).createModel,

 QUnit.module( 'gateway/rest', {
 	beforeEach: function () {
-		mediaWiki.Title = function ( title ) {
+		window.mediaWiki.RegExp = {
+			escape: this.sandbox.spy( function ( str ) {
+				return str.replace( /([\\{}()|.?*+\-\^$\[\]])/g, '\\$1' );
+			} )
+		};
+		window.mediaWiki.Title = function ( title ) {
 			this.getUrl = function () { return 'url/' + title; };
 		};
 	},
 	afterEach: function () {
-		mediaWiki.Title = null;
+		window.mediaWiki.RegExp = null;
+		window.mediaWiki.Title = null;
 	}
 } );

--- a/tests/node-qunit/preview/model.test.js
+++ b/tests/node-qunit/preview/model.test.js
@ -23,23 +23,7 @@ QUnit.test( 'it should copy the basic properties', function ( assert ) {
 	assert.strictEqual( model.thumbnail, thumbnail );
 } );

-QUnit.test( 'it computes the extract property', function ( assert ) {
-	var i, testCase, cases = [
-			// removeEllipsis
-			[ '', undefined ],
-			[ 'Extract...', 'Extract' ],
-			[ 'Extract.', 'Extract.' ],
-			[ '...', undefined ],
-
-			// removeParentheticals
-			[ 'Foo', 'Foo' ],
-			[ 'Foo (', 'Foo (' ],
-			[ 'Foo (Bar)', 'Foo' ],
-			[ 'Foo (Bar))', 'Foo (Bar))' ],
-			[ 'Foo )(Bar)', 'Foo )(Bar)' ],
-			[ '(Bar)', undefined ]
-		];
-
+QUnit.test( 'it computes the type property', function ( assert ) {
 	function createModelWithExtract( extract ) {
 		return createModel(
 			'Foo',
@ -50,16 +34,6 @@ QUnit.test( 'it computes the extract property', function ( assert ) {
 		);
 	}

-	for ( i = 0; i < cases.length; i++ ) {
-		testCase = cases[ i ];
-		model = createModelWithExtract( testCase[ 0 ] );
-
-		assert.strictEqual( model.extract, testCase[ 1 ] );
-	}
-
-	// ---
-	// It computes the type property...
-
 	model = createModelWithExtract( 'Foo' );

 	assert.strictEqual(
--- a/tests/node-qunit/renderer.js
+++ b/tests/node-qunit/renderer.js
@ -628,57 +628,6 @@ QUnit.test( 'createThumbnailElement', function ( assert ) {

 } );

-QUnit.test( 'getProcessedElements', function ( assert ) {
-	var cases = [
-		[
-			'Isaac Newton was born in', 'Isaac Newton',
-			'<b>Isaac Newton</b> was born in',
-			'Title as first word'
-		],
-		[
-			'The C* language not to be confused with C# or C', 'C*',
-			'The <b>C*</b> language not to be confused with C# or C',
-			'Title containing *'
-		],
-		[
-			'I like trains', 'Train',
-			'I like <b>train</b>s',
-			'Make the simple plural bold'
-		],
-		[
-			'Foo\'s pub is a pub in Bar', 'Foo\'s pub',
-			'<b>Foo\'s pub</b> is a pub in Bar',
-			'Correct escaping'
-		],
-		[
-			'\"Heroes\" is a David Bowie album', '\"Heroes\"',
-			'<b>\"Heroes\"</b> is a David Bowie album',
-			'Quotes in title'
-		],
-		[
-			'*Testing if Things are correctly identified', 'Things',
-			'*Testing if <b>Things</b> are correctly identified',
-			'Article that begins with asterisk'
-		],
-		[
-			'Testing if repeated words are not matched when repeated', 'Repeated',
-			'Testing if <b>repeated</b> words are not matched when repeated',
-			'Repeated title'
-		]
-	];
-
-	function test( extract, title, expected, msg ) {
-		var $div = $( '<div>' ).append(
-			renderer.renderExtract( extract, title )
-		);
-		assert.equal( $div.html(), expected, msg );
-	}
-
-	cases.forEach( function ( case_ ) {
-		test( case_[ 0 ], case_[ 1 ], case_[ 2 ], case_[ 3 ] );
-	} );
-} );
-
 QUnit.test( '#createLayout - portrait preview, mouse event, link is on the top left of the page', function ( assert ) {
 	var isPreviewTall = false,
 		eventData = {