mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-09-26 19:56:49 +00:00
Merge "First pass implementing a general tag minimization routine"
This commit is contained in:
commit
befd223476
|
@ -7,6 +7,7 @@ var events = require('events'),
|
|||
// Quick HACK: define Node constants
|
||||
// https://developer.mozilla.org/en/nodeType
|
||||
var Node = {
|
||||
ELEMENT_NODE: 1,
|
||||
TEXT_NODE: 3,
|
||||
COMMENT_NODE: 8
|
||||
};
|
||||
|
@ -15,6 +16,237 @@ var isElementContentWhitespace = function ( e ) {
|
|||
return (e.data.match(/^[ \r\n\t]*$/) !== null);
|
||||
};
|
||||
|
||||
function minimize_inline_tags(root, rewriteable_nodes) {
|
||||
var rewriteable_node_map = null;
|
||||
|
||||
function tail(a) {
|
||||
return a[a.length-1];
|
||||
}
|
||||
|
||||
function remove_all_children(node) {
|
||||
while (node.hasChildNodes()) node.removeChild(node.firstChild);
|
||||
}
|
||||
|
||||
function add_children(node, children) {
|
||||
for (var i = 0, n = children.length; i < n; i++) node.appendChild(children[i]);
|
||||
}
|
||||
|
||||
function init() {
|
||||
rewriteable_node_map = {};
|
||||
for (var i = 0, n = rewriteable_nodes.length; i < n; i++) {
|
||||
rewriteable_node_map[rewriteable_nodes[i].toLowerCase()] = true;
|
||||
}
|
||||
}
|
||||
|
||||
function is_rewriteable_node(node_name) {
|
||||
return rewriteable_node_map[node_name];
|
||||
}
|
||||
|
||||
// Main routine
|
||||
function rewrite(node) {
|
||||
var children = node.childNodes;
|
||||
var n = children.length;
|
||||
|
||||
// If we have a single node, no restructuring is possible at this level
|
||||
// Descend ...
|
||||
if (n === 1) {
|
||||
var sole_node = children[0];
|
||||
if (sole_node.nodeType === Node.ELEMENT_NODE) rewrite(sole_node);
|
||||
return;
|
||||
}
|
||||
|
||||
// * Collect longest linear paths for all children
|
||||
// * Process subtrees attached to the end of those paths
|
||||
// * Restructure the list of linear paths (and reattach processed subtrees at the tips).
|
||||
|
||||
var P = [];
|
||||
for (var i = 0; i < n; i++) {
|
||||
var s = children[i];
|
||||
if (s.nodeType === Node.ELEMENT_NODE) {
|
||||
var p = longest_linear_path(s);
|
||||
if (p.length === 0) {
|
||||
rewrite(s);
|
||||
// console.log("Pushed EMPTY with orig_parent: " + node.nodeName);
|
||||
P.push({path: [], orig_parent: node, children: [s]});
|
||||
} else {
|
||||
var p_tail = tail(p);
|
||||
|
||||
// console.log("llp: " + p);
|
||||
|
||||
// process subtree (depth-first)
|
||||
rewrite(p_tail);
|
||||
|
||||
// collect the restructured p_tail subtree (children)
|
||||
var child_nodes = p_tail.childNodes;
|
||||
var new_children = [];
|
||||
for (var j = 0, n2 = child_nodes.length; j < n2; j++) {
|
||||
new_children.push(child_nodes[j]);
|
||||
}
|
||||
|
||||
// console.log("Pushed: " + p + ", tail: " + p_tail.nodeName + "; new_children: " + new_children.length);
|
||||
P.push({path: p, orig_parent: p_tail, children: new_children});
|
||||
}
|
||||
} else {
|
||||
// console.log("Pushed EMPTY with subtree: " + s);
|
||||
P.push({path: [], orig_parent: node, children: [s]});
|
||||
}
|
||||
}
|
||||
|
||||
// Rewrite paths in 'P'
|
||||
if (P.length > 0) rewrite_paths(node, P);
|
||||
}
|
||||
|
||||
function longest_linear_path(node) {
|
||||
var children, path = [];
|
||||
while (node.nodeType == Node.ELEMENT_NODE) {
|
||||
path.push(node);
|
||||
children = node.childNodes;
|
||||
if ((children.length === 0) || (children.length > 1)) return path;
|
||||
node = children[0];
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
function rewrite_paths(parent_node, P) {
|
||||
// 1. Split P into maximal sublists where each sublist has a non-null path intersection.
|
||||
// 2. Process each sublist separately and accumulate the result.
|
||||
//
|
||||
// lcs = longest common sublist
|
||||
|
||||
remove_all_children(parent_node);
|
||||
|
||||
var sublists = split_into_disjoint_sublists(P);
|
||||
// console.log("# sublists: " + sublists.length + ", parent_node: " + parent_node.nodeName);
|
||||
for (var i = 0, num_sublists = sublists.length; i < num_sublists; i++) {
|
||||
var s = sublists[i];
|
||||
var lcs = s.lcs;
|
||||
|
||||
if (lcs.length > 0) {
|
||||
// Connect up LCS
|
||||
// console.log("LCS: " + lcs);
|
||||
var prev = lcs[0];
|
||||
for (k = 1, lcs_len = lcs.length; k < lcs_len; k++) {
|
||||
var curr = lcs[k];
|
||||
// SSS FIXME: this add/remove can be optimized
|
||||
// console.log("adding " + curr.nodeName + " to " + prev.nodeName);
|
||||
remove_all_children(prev);
|
||||
prev.appendChild(curr);
|
||||
prev = curr;
|
||||
}
|
||||
|
||||
// Lastly, attach lcs to the incoming parent
|
||||
parent_node.appendChild(lcs[0]);
|
||||
}
|
||||
|
||||
var paths = s.paths;
|
||||
var num_paths = paths.length;
|
||||
// console.log("sublist: lcs: " + lcs + ", #paths: " + num_paths);
|
||||
if (num_paths === 1) {
|
||||
// Nothing more to do! Stitch things up
|
||||
// two possible scenarios:
|
||||
// (a) we have an empty path ==> attach the children to parent_node
|
||||
// (b) we have a non-empty path ==> attach the children to the end of the path
|
||||
var p = paths[0].path;
|
||||
var children = paths[0].children;
|
||||
if (p.length > 0) {
|
||||
var p_tail = tail(p);
|
||||
remove_all_children(p_tail);
|
||||
add_children(p_tail, children);
|
||||
} else {
|
||||
add_children(parent_node, children);
|
||||
}
|
||||
} else {
|
||||
// Process the sublist
|
||||
rewrite_paths(tail(lcs), strip_lcs(paths, lcs));
|
||||
}
|
||||
|
||||
// console.log("done with this sublist");
|
||||
}
|
||||
// console.log("--done all sublists--");
|
||||
}
|
||||
|
||||
function common_path(old, new_path) {
|
||||
var hash = {};
|
||||
for (var i = 0, n = new_path.length; i < n; i++) {
|
||||
var e = new_path[i].nodeName.toLowerCase();
|
||||
if (is_rewriteable_node(e)) hash[e] = new_path[i];
|
||||
}
|
||||
|
||||
var cp = [];
|
||||
for (i = 0, n = old.length; i < n; i++) {
|
||||
var hit = hash[old[i].nodeName.toLowerCase()];
|
||||
// Add old path element always. This effectively picks elements from the leftmost path.
|
||||
if (hit) cp.push(old[i]);
|
||||
}
|
||||
|
||||
// console.log("CP: " + old + "||" + new_path + "=" + cp);
|
||||
return cp;
|
||||
}
|
||||
|
||||
// For each 'p' in 'paths', eliminate 'lcs' from 'p'
|
||||
function strip_lcs(paths, lcs) {
|
||||
// SSS FIXME: Implicit assumption: there are no duplicate elements in lcs or path!
|
||||
// Ex: <b><i><b>BIB</b></i></b> will
|
||||
// Fix this to be more robust
|
||||
|
||||
var i, lcs_map = {};
|
||||
for (i = 0, n = lcs.length; i < n; i++) lcs_map[lcs[i]] = true;
|
||||
|
||||
for (i = 0, n = paths.length; i < n; i++) {
|
||||
var p = paths[i].path;
|
||||
for (var j = 0, l = p.length; j < l; j++) {
|
||||
// remove matching element
|
||||
if (lcs_map[p[j]]) {
|
||||
p.splice(j, 1);
|
||||
l--;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return paths;
|
||||
}
|
||||
|
||||
// Split 'P' into sublists where each sublist has the property that
|
||||
// the elements of the sublist have an intersection that is non-zero
|
||||
// Ex: [BIUS, SB, BUS, IU, I, U, US, B, I] will get split into 5 sublists
|
||||
// - (lcs: BS, paths: [BIUS, SB, BUS])
|
||||
// - (lcs: I, paths: [IU, I])
|
||||
// - (lcs: U, paths: [U, US])
|
||||
// - (lcs: B, paths: [B])
|
||||
// - (lcs: I, paths: [I])
|
||||
function split_into_disjoint_sublists(P) {
|
||||
var p = P.shift();
|
||||
var lcs = p.path;
|
||||
var curr = [p];
|
||||
|
||||
for (var i = 0, n = P.length; i < n; i++) {
|
||||
p = P.shift();
|
||||
new_lcs = common_path(lcs, p.path);
|
||||
if (new_lcs.length === 0) {
|
||||
P.unshift(p);
|
||||
return [{lcs: lcs, paths: curr}].concat(split_into_disjoint_sublists(P));
|
||||
}
|
||||
lcs = new_lcs;
|
||||
curr.push(p);
|
||||
}
|
||||
|
||||
return [{lcs: lcs, paths: curr}];
|
||||
}
|
||||
|
||||
// Init
|
||||
init();
|
||||
|
||||
// Kick it off
|
||||
try {
|
||||
rewrite(root);
|
||||
} catch (e) {
|
||||
console.log("------- error errrror errrrrror! ----------");
|
||||
console.log(e.stack);
|
||||
}
|
||||
}
|
||||
|
||||
// [..., T1[T2[*x]], T2[*y], ...] ==> [..., T2[T1[*x], *y], ...]
|
||||
// [..., T2[*x], T1[T2[*y]], ...] ==> [..., T2[*x, T1[*y]], ...]
|
||||
// where T1 and T2 are different and can be one of [i, b]
|
||||
|
@ -166,7 +398,8 @@ var normalize_subtree = function(node, rewrite_rules) {
|
|||
};
|
||||
|
||||
var normalize_document = function(document) {
|
||||
normalize_subtree(document.body, [rewrite_nested_tag_pairs.bind(null, document, [['b','i'], ['b','u'], ['i','u']])]);
|
||||
// normalize_subtree(document.body, [rewrite_nested_tag_pairs.bind(null, document, [['b','i'], ['b','u'], ['i','u']])]);
|
||||
minimize_inline_tags(document.body, ['b','u','i','s']);
|
||||
};
|
||||
|
||||
// Wrap all top-level inline elements in paragraphs. This should also be
|
||||
|
|
Loading…
Reference in a new issue