mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-15 10:35:48 +00:00
Merge "First pass implementing a general tag minimization routine"
This commit is contained in:
commit
befd223476
|
@ -7,6 +7,7 @@ var events = require('events'),
|
||||||
// Quick HACK: define Node constants
|
// Quick HACK: define Node constants
|
||||||
// https://developer.mozilla.org/en/nodeType
|
// https://developer.mozilla.org/en/nodeType
|
||||||
var Node = {
|
var Node = {
|
||||||
|
ELEMENT_NODE: 1,
|
||||||
TEXT_NODE: 3,
|
TEXT_NODE: 3,
|
||||||
COMMENT_NODE: 8
|
COMMENT_NODE: 8
|
||||||
};
|
};
|
||||||
|
@ -15,6 +16,237 @@ var isElementContentWhitespace = function ( e ) {
|
||||||
return (e.data.match(/^[ \r\n\t]*$/) !== null);
|
return (e.data.match(/^[ \r\n\t]*$/) !== null);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
function minimize_inline_tags(root, rewriteable_nodes) {
|
||||||
|
var rewriteable_node_map = null;
|
||||||
|
|
||||||
|
function tail(a) {
|
||||||
|
return a[a.length-1];
|
||||||
|
}
|
||||||
|
|
||||||
|
function remove_all_children(node) {
|
||||||
|
while (node.hasChildNodes()) node.removeChild(node.firstChild);
|
||||||
|
}
|
||||||
|
|
||||||
|
function add_children(node, children) {
|
||||||
|
for (var i = 0, n = children.length; i < n; i++) node.appendChild(children[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
function init() {
|
||||||
|
rewriteable_node_map = {};
|
||||||
|
for (var i = 0, n = rewriteable_nodes.length; i < n; i++) {
|
||||||
|
rewriteable_node_map[rewriteable_nodes[i].toLowerCase()] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function is_rewriteable_node(node_name) {
|
||||||
|
return rewriteable_node_map[node_name];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main routine
|
||||||
|
function rewrite(node) {
|
||||||
|
var children = node.childNodes;
|
||||||
|
var n = children.length;
|
||||||
|
|
||||||
|
// If we have a single node, no restructuring is possible at this level
|
||||||
|
// Descend ...
|
||||||
|
if (n === 1) {
|
||||||
|
var sole_node = children[0];
|
||||||
|
if (sole_node.nodeType === Node.ELEMENT_NODE) rewrite(sole_node);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// * Collect longest linear paths for all children
|
||||||
|
// * Process subtrees attached to the end of those paths
|
||||||
|
// * Restructure the list of linear paths (and reattach processed subtrees at the tips).
|
||||||
|
|
||||||
|
var P = [];
|
||||||
|
for (var i = 0; i < n; i++) {
|
||||||
|
var s = children[i];
|
||||||
|
if (s.nodeType === Node.ELEMENT_NODE) {
|
||||||
|
var p = longest_linear_path(s);
|
||||||
|
if (p.length === 0) {
|
||||||
|
rewrite(s);
|
||||||
|
// console.log("Pushed EMPTY with orig_parent: " + node.nodeName);
|
||||||
|
P.push({path: [], orig_parent: node, children: [s]});
|
||||||
|
} else {
|
||||||
|
var p_tail = tail(p);
|
||||||
|
|
||||||
|
// console.log("llp: " + p);
|
||||||
|
|
||||||
|
// process subtree (depth-first)
|
||||||
|
rewrite(p_tail);
|
||||||
|
|
||||||
|
// collect the restructured p_tail subtree (children)
|
||||||
|
var child_nodes = p_tail.childNodes;
|
||||||
|
var new_children = [];
|
||||||
|
for (var j = 0, n2 = child_nodes.length; j < n2; j++) {
|
||||||
|
new_children.push(child_nodes[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// console.log("Pushed: " + p + ", tail: " + p_tail.nodeName + "; new_children: " + new_children.length);
|
||||||
|
P.push({path: p, orig_parent: p_tail, children: new_children});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// console.log("Pushed EMPTY with subtree: " + s);
|
||||||
|
P.push({path: [], orig_parent: node, children: [s]});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rewrite paths in 'P'
|
||||||
|
if (P.length > 0) rewrite_paths(node, P);
|
||||||
|
}
|
||||||
|
|
||||||
|
function longest_linear_path(node) {
|
||||||
|
var children, path = [];
|
||||||
|
while (node.nodeType == Node.ELEMENT_NODE) {
|
||||||
|
path.push(node);
|
||||||
|
children = node.childNodes;
|
||||||
|
if ((children.length === 0) || (children.length > 1)) return path;
|
||||||
|
node = children[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
function rewrite_paths(parent_node, P) {
|
||||||
|
// 1. Split P into maximal sublists where each sublist has a non-null path intersection.
|
||||||
|
// 2. Process each sublist separately and accumulate the result.
|
||||||
|
//
|
||||||
|
// lcs = longest common sublist
|
||||||
|
|
||||||
|
remove_all_children(parent_node);
|
||||||
|
|
||||||
|
var sublists = split_into_disjoint_sublists(P);
|
||||||
|
// console.log("# sublists: " + sublists.length + ", parent_node: " + parent_node.nodeName);
|
||||||
|
for (var i = 0, num_sublists = sublists.length; i < num_sublists; i++) {
|
||||||
|
var s = sublists[i];
|
||||||
|
var lcs = s.lcs;
|
||||||
|
|
||||||
|
if (lcs.length > 0) {
|
||||||
|
// Connect up LCS
|
||||||
|
// console.log("LCS: " + lcs);
|
||||||
|
var prev = lcs[0];
|
||||||
|
for (k = 1, lcs_len = lcs.length; k < lcs_len; k++) {
|
||||||
|
var curr = lcs[k];
|
||||||
|
// SSS FIXME: this add/remove can be optimized
|
||||||
|
// console.log("adding " + curr.nodeName + " to " + prev.nodeName);
|
||||||
|
remove_all_children(prev);
|
||||||
|
prev.appendChild(curr);
|
||||||
|
prev = curr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lastly, attach lcs to the incoming parent
|
||||||
|
parent_node.appendChild(lcs[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
var paths = s.paths;
|
||||||
|
var num_paths = paths.length;
|
||||||
|
// console.log("sublist: lcs: " + lcs + ", #paths: " + num_paths);
|
||||||
|
if (num_paths === 1) {
|
||||||
|
// Nothing more to do! Stitch things up
|
||||||
|
// two possible scenarios:
|
||||||
|
// (a) we have an empty path ==> attach the children to parent_node
|
||||||
|
// (b) we have a non-empty path ==> attach the children to the end of the path
|
||||||
|
var p = paths[0].path;
|
||||||
|
var children = paths[0].children;
|
||||||
|
if (p.length > 0) {
|
||||||
|
var p_tail = tail(p);
|
||||||
|
remove_all_children(p_tail);
|
||||||
|
add_children(p_tail, children);
|
||||||
|
} else {
|
||||||
|
add_children(parent_node, children);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Process the sublist
|
||||||
|
rewrite_paths(tail(lcs), strip_lcs(paths, lcs));
|
||||||
|
}
|
||||||
|
|
||||||
|
// console.log("done with this sublist");
|
||||||
|
}
|
||||||
|
// console.log("--done all sublists--");
|
||||||
|
}
|
||||||
|
|
||||||
|
function common_path(old, new_path) {
|
||||||
|
var hash = {};
|
||||||
|
for (var i = 0, n = new_path.length; i < n; i++) {
|
||||||
|
var e = new_path[i].nodeName.toLowerCase();
|
||||||
|
if (is_rewriteable_node(e)) hash[e] = new_path[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
var cp = [];
|
||||||
|
for (i = 0, n = old.length; i < n; i++) {
|
||||||
|
var hit = hash[old[i].nodeName.toLowerCase()];
|
||||||
|
// Add old path element always. This effectively picks elements from the leftmost path.
|
||||||
|
if (hit) cp.push(old[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// console.log("CP: " + old + "||" + new_path + "=" + cp);
|
||||||
|
return cp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For each 'p' in 'paths', eliminate 'lcs' from 'p'
|
||||||
|
function strip_lcs(paths, lcs) {
|
||||||
|
// SSS FIXME: Implicit assumption: there are no duplicate elements in lcs or path!
|
||||||
|
// Ex: <b><i><b>BIB</b></i></b> will
|
||||||
|
// Fix this to be more robust
|
||||||
|
|
||||||
|
var i, lcs_map = {};
|
||||||
|
for (i = 0, n = lcs.length; i < n; i++) lcs_map[lcs[i]] = true;
|
||||||
|
|
||||||
|
for (i = 0, n = paths.length; i < n; i++) {
|
||||||
|
var p = paths[i].path;
|
||||||
|
for (var j = 0, l = p.length; j < l; j++) {
|
||||||
|
// remove matching element
|
||||||
|
if (lcs_map[p[j]]) {
|
||||||
|
p.splice(j, 1);
|
||||||
|
l--;
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return paths;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split 'P' into sublists where each sublist has the property that
|
||||||
|
// the elements of the sublist have an intersection that is non-zero
|
||||||
|
// Ex: [BIUS, SB, BUS, IU, I, U, US, B, I] will get split into 5 sublists
|
||||||
|
// - (lcs: BS, paths: [BIUS, SB, BUS])
|
||||||
|
// - (lcs: I, paths: [IU, I])
|
||||||
|
// - (lcs: U, paths: [U, US])
|
||||||
|
// - (lcs: B, paths: [B])
|
||||||
|
// - (lcs: I, paths: [I])
|
||||||
|
function split_into_disjoint_sublists(P) {
|
||||||
|
var p = P.shift();
|
||||||
|
var lcs = p.path;
|
||||||
|
var curr = [p];
|
||||||
|
|
||||||
|
for (var i = 0, n = P.length; i < n; i++) {
|
||||||
|
p = P.shift();
|
||||||
|
new_lcs = common_path(lcs, p.path);
|
||||||
|
if (new_lcs.length === 0) {
|
||||||
|
P.unshift(p);
|
||||||
|
return [{lcs: lcs, paths: curr}].concat(split_into_disjoint_sublists(P));
|
||||||
|
}
|
||||||
|
lcs = new_lcs;
|
||||||
|
curr.push(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
return [{lcs: lcs, paths: curr}];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init
|
||||||
|
init();
|
||||||
|
|
||||||
|
// Kick it off
|
||||||
|
try {
|
||||||
|
rewrite(root);
|
||||||
|
} catch (e) {
|
||||||
|
console.log("------- error errrror errrrrror! ----------");
|
||||||
|
console.log(e.stack);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// [..., T1[T2[*x]], T2[*y], ...] ==> [..., T2[T1[*x], *y], ...]
|
// [..., T1[T2[*x]], T2[*y], ...] ==> [..., T2[T1[*x], *y], ...]
|
||||||
// [..., T2[*x], T1[T2[*y]], ...] ==> [..., T2[*x, T1[*y]], ...]
|
// [..., T2[*x], T1[T2[*y]], ...] ==> [..., T2[*x, T1[*y]], ...]
|
||||||
// where T1 and T2 are different and can be one of [i, b]
|
// where T1 and T2 are different and can be one of [i, b]
|
||||||
|
@ -166,7 +398,8 @@ var normalize_subtree = function(node, rewrite_rules) {
|
||||||
};
|
};
|
||||||
|
|
||||||
var normalize_document = function(document) {
|
var normalize_document = function(document) {
|
||||||
normalize_subtree(document.body, [rewrite_nested_tag_pairs.bind(null, document, [['b','i'], ['b','u'], ['i','u']])]);
|
// normalize_subtree(document.body, [rewrite_nested_tag_pairs.bind(null, document, [['b','i'], ['b','u'], ['i','u']])]);
|
||||||
|
minimize_inline_tags(document.body, ['b','u','i','s']);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Wrap all top-level inline elements in paragraphs. This should also be
|
// Wrap all top-level inline elements in paragraphs. This should also be
|
||||||
|
|
Loading…
Reference in a new issue