From f00325d6cc5529080e045923ae8d3bbda5030287 Mon Sep 17 00:00:00 2001 From: Subramanya Sastry Date: Mon, 21 Sep 2020 13:22:54 -0500 Subject: [PATCH] Introduce preprocessing in the HTML -> WT direction * This patch introduces a preprocessing step on the edited DOM. * Existing preprocessing code has been extracted into the preprocessDOM method. Any registered extensions preprocessors are invoked on the DOM. So, this assumes that the htmlPreprocess extension listener is only applicable to the edited DOM. If we want to expose the concept of selective serialization through the API, we may want to add an additional interface method / listener to the DOMProcessor class. As of this patch, this is somewhat theoretical since there are no such extension handlers registered on either DOM. Future patches can clarify this better as specific needs arise. * The handler also calls the serializer's custom preprocessing steps. This step is applicable to both the original as well as edited DOM (since DOM Diff is impacted by the results). If a need arises, in the future, we may introduce a new extension DOM processor method that applies to both original and edited DOMs. * Right now, only selser strips section tags and non-selser wts doesn't need to. So, preprocessDOM there is empty. Additional selser-only DOM preprocessing will show up in later patches. * Moved a stub HTML->WT preprocessor in Cite extension to RefProcessor. Bug: T254501 Change-Id: I0c12afb2ea82617406d72ad872ac4f33678fa5f2 --- src/Parsoid/Cite.php | 22 ---------------------- src/Parsoid/RefProcessor.php | 16 +++++++++++++++- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/src/Parsoid/Cite.php b/src/Parsoid/Cite.php index 069d33501..875cbb0b0 100644 --- a/src/Parsoid/Cite.php +++ b/src/Parsoid/Cite.php @@ -3,9 +3,7 @@ declare( strict_types = 1 ); namespace Wikimedia\Parsoid\Ext\Cite; -use DOMNode; use Wikimedia\Parsoid\Ext\ExtensionModule; -use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; /** * Native Parsoid implementation of the Cite extension @@ -41,24 +39,4 @@ class Cite implements ExtensionModule { ] ]; } - - /** - * html -> wt DOM PreProcessor - * - * This is to reconstitute page-level information from local annotations - * left behind by editing clients. - * - * Editing clients add inserted: true or deleted: true properties to a 's - * data-mw object. These are no-ops for non-named s. For named s, - * - for inserted refs, we might want to de-duplicate refs. - * - for deleted refs, if the primary ref was deleted, we have to transfer - * the primary ref designation to another instance of the named ref. - * - * @param ParsoidExtensionAPI $extApi - * @param DOMNode $body - * @suppress PhanEmptyPrivateMethod - */ - private static function html2wtPreProcessor( ParsoidExtensionAPI $extApi, DOMNode $body ) { - // TODO - } } diff --git a/src/Parsoid/RefProcessor.php b/src/Parsoid/RefProcessor.php index 407d80978..0323c557e 100644 --- a/src/Parsoid/RefProcessor.php +++ b/src/Parsoid/RefProcessor.php @@ -3,6 +3,7 @@ declare( strict_types = 1 ); namespace Wikimedia\Parsoid\Ext\Cite; +use DOMElement; use DOMNode; use Wikimedia\Parsoid\Ext\DOMProcessor; use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; @@ -25,5 +26,18 @@ class RefProcessor extends DOMProcessor { } } - // FIXME: should implement an htmlPreprocess method as well. + /** + * html -> wt DOM PreProcessor + * + * Nothing to do right now. + * + * But, for example, as part of some future functionality, this could be used to + * reconstitute page-level information from local annotations left behind by editing clients. + * + * @param ParsoidExtensionAPI $extApi + * @param DOMElement $root + */ + public function htmlPreprocess( ParsoidExtensionAPI $extApi, DOMElement $root ): void { + // TODO + } }