From aceea5b6231422e9d2b284b8116c6e8b2624c5c2 Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Tue, 15 Dec 2020 18:58:48 -0500 Subject: [PATCH] ApiVisualEditorEdit: the 'html' parameter should be raw to avoid normalization Although wikitext is (expected to be) in Unicode Normalization Form C, the output HTML may not be, due to the presence of explicit entities in the wikitext representing non-NFC codepoints. Bug: T266140 Depends-On: I2e78e660ba1867744e34eda7d00ea527ec016b71 Change-Id: I0d34c9a01f1132c2616ed3392ea40d8b73e15325 --- includes/ApiVisualEditorEdit.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/includes/ApiVisualEditorEdit.php b/includes/ApiVisualEditorEdit.php index 7ccd51546a..2cb1cdf90c 100644 --- a/includes/ApiVisualEditorEdit.php +++ b/includes/ApiVisualEditorEdit.php @@ -480,7 +480,14 @@ class ApiVisualEditorEdit extends ApiBase { 'minor' => null, 'watchlist' => null, 'html' => [ - ParamValidator::PARAM_TYPE => 'text', + // Use the 'raw' type to avoid Unicode NFC normalization. + // This makes the parameter binary safe, so that (a) if + // we use client-side compression it is not mangled, and/or + // (b) deprecated Unicode sequences explicitly encoded in + // wikitext (ie,  ) are not mangled. Wikitext is + // in Unicode Normal Form C, but because of explicit entities + // the output HTML is not guaranteed to be. + ParamValidator::PARAM_TYPE => 'raw', ParamValidator::PARAM_DEFAULT => null, ], 'etag' => null,