/**
* DomParser.js
*
* Released under LGPL License.
* Copyright (c) 1999-2015 Ephox Corp. All rights reserved
*
* License: http://www.tinymce.com/license
* Contributing: http://www.tinymce.com/contributing
*/
/**
* This class parses HTML code into a DOM like structure of nodes it will remove redundant whitespace and make
* sure that the node tree is valid according to the specified schema.
* So for example: <p>a<p>b</p>c</p> will become <p>a</p><p>b</p><p>c</p>
*
* @example
* var parser = new tinymce.html.DomParser({validate: true}, schema);
* var rootNode = parser.parse('<h1>content</h1>');
*
* @class tinymce.html.DomParser
* @version 3.4
*/
define("tinymce/html/DomParser", [
"tinymce/html/Node",
"tinymce/html/Schema",
"tinymce/html/SaxParser",
"tinymce/util/Tools"
], function(Node, Schema, SaxParser, Tools) {
var makeMap = Tools.makeMap, each = Tools.each, explode = Tools.explode, extend = Tools.extend;
/**
* Constructs a new DomParser instance.
*
* @constructor
* @method DomParser
* @param {Object} settings Name/value collection of settings. comment, cdata, text, start and end are callbacks.
* @param {tinymce.html.Schema} schema HTML Schema class to use when parsing.
*/
return function(settings, schema) {
var self = this, nodeFilters = {}, attributeFilters = [], matchedNodes = {}, matchedAttributes = {};
settings = settings || {};
settings.validate = "validate" in settings ? settings.validate : true;
settings.root_name = settings.root_name || 'body';
self.schema = schema = schema || new Schema();
function fixInvalidChildren(nodes) {
var ni, node, parent, parents, newParent, currentNode, tempNode, childNode, i;
var nonEmptyElements, nonSplitableElements, textBlockElements, specialElements, sibling, nextNode;
nonSplitableElements = makeMap('tr,td,th,tbody,thead,tfoot,table');
nonEmptyElements = schema.getNonEmptyElements();
textBlockElements = schema.getTextBlockElements();
specialElements = schema.getSpecialElements();
for (ni = 0; ni < nodes.length; ni++) {
node = nodes[ni];
// Already removed or fixed
if (!node.parent || node.fixed) {
continue;
}
// If the invalid element is a text block and the text block is within a parent LI element
// Then unwrap the first text block and convert other sibling text blocks to LI elements similar to Word/Open Office
if (textBlockElements[node.name] && node.parent.name == 'li') {
// Move sibling text blocks after LI element
sibling = node.next;
while (sibling) {
if (textBlockElements[sibling.name]) {
sibling.name = 'li';
sibling.fixed = true;
node.parent.insert(sibling, node.parent);
} else {
break;
}
sibling = sibling.next;
}
// Unwrap current text block
node.unwrap(node);
continue;
}
// Get list of all parent nodes until we find a valid parent to stick the child into
parents = [node];
for (parent = node.parent; parent && !schema.isValidChild(parent.name, node.name) &&
!nonSplitableElements[parent.name]; parent = parent.parent) {
parents.push(parent);
}
// Found a suitable parent
if (parent && parents.length > 1) {
// Reverse the array since it makes looping easier
parents.reverse();
// Clone the related parent and insert that after the moved node
newParent = currentNode = self.filterNode(parents[0].clone());
// Start cloning and moving children on the left side of the target node
for (i = 0; i < parents.length - 1; i++) {
if (schema.isValidChild(currentNode.name, parents[i].name)) {
tempNode = self.filterNode(parents[i].clone());
currentNode.append(tempNode);
} else {
tempNode = currentNode;
}
for (childNode = parents[i].firstChild; childNode && childNode != parents[i + 1];) {
nextNode = childNode.next;
tempNode.append(childNode);
childNode = nextNode;
}
currentNode = tempNode;
}
if (!newParent.isEmpty(nonEmptyElements)) {
parent.insert(newParent, parents[0], true);
parent.insert(node, newParent);
} else {
parent.insert(node, parents[0], true);
}
// Check if the element is empty by looking through it's contents and special treatment for <p><br /></p>
parent = parents[0];
if (parent.isEmpty(nonEmptyElements) || parent.firstChild === parent.lastChild && parent.firstChild.name === 'br') {
parent.empty().remove();
}
} else if (node.parent) {
// If it's an LI try to find a UL/OL for it or wrap it
if (node.name === 'li') {
sibling = node.prev;
if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) {
sibling.append(node);
continue;
}
sibling = node.next;
if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) {
sibling.insert(node, sibling.firstChild, true);
continue;
}
node.wrap(self.filterNode(new Node('ul', 1)));
continue;
}
// Try wrapping the element in a DIV
if (schema.isValidChild(node.parent.name, 'div') && schema.isValidChild('div', node.name)) {
node.wrap(self.filterNode(new Node('div', 1)));
} else {
// We failed wrapping it, then remove or unwrap it
if (specialElements[node.name]) {
node.empty().remove();
} else {
node.unwrap();
}
}
}
}
}
/**
* Runs the specified node though the element and attributes filters.
*
* @method filterNode
* @param {tinymce.html.Node} Node the node to run filters on.
* @return {tinymce.html.Node} The passed in node.
*/
self.filterNode = function(node) {
var i, name, list;
// Run element filters
if (name in nodeFilters) {
list = matchedNodes[name];
if (list) {
list.push(node);
} else {
matchedNodes[name] = [node];
}
}
// Run attribute filters
i = attributeFilters.length;
while (i--) {
name = attributeFilters[i].name;
if (name in node.attributes.map) {
list = matchedAttributes[name];
if (list) {
list.push(node);
} else {
matchedAttributes[name] = [node];
}
}
}
return node;
};
/**
* Adds a node filter function to the parser, the parser will collect the specified nodes by name
* and then execute the callback ones it has finished parsing the document.
*
* @example
* parser.addNodeFilter('p,h1', function(nodes, name) {
* for (var i = 0; i < nodes.length; i++) {
* console.log(nodes[i].name);
* }
* });
* @method addNodeFilter
* @method {String} name Comma separated list of nodes to collect.
* @param {function} callback Callback function to execute once it has collected nodes.
*/
self.addNodeFilter = function(name, callback) {
each(explode(name), function(name) {
var list = nodeFilters[name];
if (!list) {
nodeFilters[name] = list = [];
}
list.push(callback);
});
};
/**
* Adds a attribute filter function to the parser, the parser will collect nodes that has the specified attributes
* and then execute the callback ones it has finished parsing the document.
*
* @example
* parser.addAttributeFilter('src,href', function(nodes, name) {
* for (var i = 0; i < nodes.length; i++) {
* console.log(nodes[i].name);
* }
* });
* @method addAttributeFilter
* @method {String} name Comma separated list of nodes to collect.
* @param {function} callback Callback function to execute once it has collected nodes.
*/
self.addAttributeFilter = function(name, callback) {
each(explode(name), function(name) {
var i;
for (i = 0; i < attributeFilters.length; i++) {
if (attributeFilters[i].name === name) {
attributeFilters[i].callbacks.push(callback);
return;
}
}
attributeFilters.push({name: name, callbacks: [callback]});
});
};
/**
* Parses the specified HTML string into a DOM like node tree and returns the result.
*
* @example
* var rootNode = new DomParser({...}).parse('<b>text</b>');
* @method parse
* @param {String} html Html string to sax parse.
* @param {Object} args Optional args object that gets passed to all filter functions.
* @return {tinymce.html.Node} Root node containing the tree.
*/
self.parse = function(html, args) {
var parser, rootNode, node, nodes, i, l, fi, fl, list, name, validate;
var blockElements, startWhiteSpaceRegExp, invalidChildren = [], isInWhiteSpacePreservedElement;
var endWhiteSpaceRegExp, allWhiteSpaceRegExp, isAllWhiteSpaceRegExp, whiteSpaceElements;
var children, nonEmptyElements, rootBlockName;
args = args || {};
matchedNodes = {};
matchedAttributes = {};
blockElements = extend(makeMap('script,style,head,html,body,title,meta,param'), schema.getBlockElements());
nonEmptyElements = schema.getNonEmptyElements();
children = schema.children;
validate = settings.validate;
rootBlockName = "forced_root_block" in args ? args.forced_root_block : settings.forced_root_block;
whiteSpaceElements = schema.getWhiteSpaceElements();
startWhiteSpaceRegExp = /^[ \t\r\n]+/;
endWhiteSpaceRegExp = /[ \t\r\n]+$/;
allWhiteSpaceRegExp = /[ \t\r\n]+/g;
isAllWhiteSpaceRegExp = /^[ \t\r\n]+$/;
function addRootBlocks() {
var node = rootNode.firstChild, next, rootBlockNode;
// Removes whitespace at beginning and end of block so:
// <p> x </p> -> <p>x</p>
function trim(rootBlockNode) {
if (rootBlockNode) {
node = rootBlockNode.firstChild;
if (node && node.type == 3) {
node.value = node.value.replace(startWhiteSpaceRegExp, '');
}
node = rootBlockNode.lastChild;
if (node && node.type == 3) {
node.value = node.value.replace(endWhiteSpaceRegExp, '');
}
}
}
// Check if rootBlock is valid within rootNode for example if P is valid in H1 if H1 is the contentEditabe root
if (!schema.isValidChild(rootNode.name, rootBlockName.toLowerCase())) {
return;
}
while (node) {
next = node.next;
if (node.type == 3 || (node.type == 1 && node.name !== 'p' &&
!blockElements[node.name] && !node.attr('data-mce-type'))) {
if (!rootBlockNode) {
// Create a new root block element
rootBlockNode = createNode(rootBlockName, 1);
rootBlockNode.attr(settings.forced_root_block_attrs);
rootNode.insert(rootBlockNode, node);
rootBlockNode.append(node);
} else {
rootBlockNode.append(node);
}
} else {
trim(rootBlockNode);
rootBlockNode = null;
}
node = next;
}
trim(rootBlockNode);
}
function createNode(name, type) {
var node = new Node(name, type), list;
if (name in nodeFilters) {
list = matchedNodes[name];
if (list) {
list.push(node);
} else {
matchedNodes[name] = [node];
}
}
return node;
}
function removeWhitespaceBefore(node) {
var textNode, textNodeNext, textVal, sibling, blockElements = schema.getBlockElements();
for (textNode = node.prev; textNode && textNode.type === 3;) {
textVal = textNode.value.replace(endWhiteSpaceRegExp, '');
// Found a text node with non whitespace then trim that and break
if (textVal.length > 0) {
textNode.value = textVal;
return;
}
textNodeNext = textNode.next;
// Fix for bug #7543 where bogus nodes would produce empty
// text nodes and these would be removed if a nested list was before it
if (textNodeNext) {
if (textNodeNext.type == 3 && textNodeNext.value.length) {
textNode = textNode.prev;
continue;
}
if (!blockElements[textNodeNext.name] && textNodeNext.name != 'script' && textNodeNext.name != 'style') {
textNode = textNode.prev;
continue;
}
}
sibling = textNode.prev;
textNode.remove();
textNode = sibling;
}
}
function cloneAndExcludeBlocks(input) {
var name, output = {};
for (name in input) {
if (name !== 'li' && name != 'p') {
output[name] = input[name];
}
}
return output;
}
parser = new SaxParser({
validate: validate,
allow_script_urls: settings.allow_script_urls,
allow_conditional_comments: settings.allow_conditional_comments,
// Exclude P and LI from DOM parsing since it's treated better by the DOM parser
self_closing_elements: cloneAndExcludeBlocks(schema.getSelfClosingElements()),
cdata: function(text) {
node.append(createNode('#cdata', 4)).value = text;
},
text: function(text, raw) {
var textNode;
// Trim all redundant whitespace on non white space elements
if (!isInWhiteSpacePreservedElement) {
text = text.replace(allWhiteSpaceRegExp, ' ');
if (node.lastChild && blockElements[node.lastChild.name]) {
text = text.replace(startWhiteSpaceRegExp, '');
}
}
// Do we need to create the node
if (text.length !== 0) {
textNode = createNode('#text', 3);
textNode.raw = !!raw;
node.append(textNode).value = text;
}
},
comment: function(text) {
node.append(createNode('#comment', 8)).value = text;
},
pi: function(name, text) {
node.append(createNode(name, 7)).value = text;
removeWhitespaceBefore(node);
},
doctype: function(text) {
var newNode;
newNode = node.append(createNode('#doctype', 10));
newNode.value = text;
removeWhitespaceBefore(node);
},
start: function(name, attrs, empty) {
var newNode, attrFiltersLen, elementRule, attrName, parent;
elementRule = validate ? schema.getElementRule(name) : {};
if (elementRule) {
newNode = createNode(elementRule.outputName || name, 1);
newNode.attributes = attrs;
newNode.shortEnded = empty;
node.append(newNode);
// Check if node is valid child of the parent node is the child is
// unknown we don't collect it since it's probably a custom element
parent = children[node.name];
if (parent && children[newNode.name] && !parent[newNode.name]) {
invalidChildren.push(newNode);
}
attrFiltersLen = attributeFilters.length;
while (attrFiltersLen--) {
attrName = attributeFilters[attrFiltersLen].name;
if (attrName in attrs.map) {
list = matchedAttributes[attrName];
if (list) {
list.push(newNode);
} else {
matchedAttributes[attrName] = [newNode];
}
}
}
// Trim whitespace before block
if (blockElements[name]) {
removeWhitespaceBefore(newNode);
}
// Change current node if the element wasn't empty i.e not <br /> or <img />
if (!empty) {
node = newNode;
}
// Check if we are inside a whitespace preserved element
if (!isInWhiteSpacePreservedElement && whiteSpaceElements[name]) {
isInWhiteSpacePreservedElement = true;
}
}
},
end: function(name) {
var textNode, elementRule, text, sibling, tempNode;
elementRule = validate ? schema.getElementRule(name) : {};
if (elementRule) {
if (blockElements[name]) {
if (!isInWhiteSpacePreservedElement) {
// Trim whitespace of the first node in a block
textNode = node.firstChild;
if (textNode && textNode.type === 3) {
text = textNode.value.replace(startWhiteSpaceRegExp, '');
// Any characters left after trim or should we remove it
if (text.length > 0) {
textNode.value = text;
textNode = textNode.next;
} else {
sibling = textNode.next;
textNode.remove();
textNode = sibling;
// Remove any pure whitespace siblings
while (textNode && textNode.type === 3) {
text = textNode.value;
sibling = textNode.next;
if (text.length === 0 || isAllWhiteSpaceRegExp.test(text)) {
textNode.remove();
textNode = sibling;
}
textNode = sibling;
}
}
}
// Trim whitespace of the last node in a block
textNode = node.lastChild;
if (textNode && textNode.type === 3) {
text = textNode.value.replace(endWhiteSpaceRegExp, '');
// Any characters left after trim or should we remove it
if (text.length > 0) {
textNode.value = text;
textNode = textNode.prev;
} else {
sibling = textNode.prev;
textNode.remove();
textNode = sibling;
// Remove any pure whitespace siblings
while (textNode && textNode.type === 3) {
text = textNode.value;
sibling = textNode.prev;
if (text.length === 0 || isAllWhiteSpaceRegExp.test(text)) {
textNode.remove();
textNode = sibling;
}
textNode = sibling;
}
}
}
}
// Trim start white space
// Removed due to: #5424
/*textNode = node.prev;
if (textNode && textNode.type === 3) {
text = textNode.value.replace(startWhiteSpaceRegExp, '');
if (text.length > 0)
textNode.value = text;
else
textNode.remove();
}*/
}
// Check if we exited a whitespace preserved element
if (isInWhiteSpacePreservedElement && whiteSpaceElements[name]) {
isInWhiteSpacePreservedElement = false;
}
// Handle empty nodes
if (elementRule.removeEmpty || elementRule.paddEmpty) {
if (node.isEmpty(nonEmptyElements)) {
if (elementRule.paddEmpty) {
node.empty().append(new Node('#text', '3')).value = '\u00a0';
} else {
// Leave nodes that have a name like <a name="name">
if (!node.attributes.map.name && !node.attributes.map.id) {
tempNode = node.parent;
if (blockElements[node.name]) {
node.empty().remove();
} else {
node.unwrap();
}
node = tempNode;
return;
}
}
}
}
node = node.parent;
}
}
}, schema);
rootNode = node = new Node(args.context || settings.root_name, 11);
parser.parse(html);
// Fix invalid children or report invalid children in a contextual parsing
if (validate && invalidChildren.length) {
if (!args.context) {
fixInvalidChildren(invalidChildren);
} else {
args.invalid = true;
}
}
// Wrap nodes in the root into block elements if the root is body
if (rootBlockName && (rootNode.name == 'body' || args.isRootContent)) {
addRootBlocks();
}
// Run filters only when the contents is valid
if (!args.invalid) {
// Run node filters
for (name in matchedNodes) {
list = nodeFilters[name];
nodes = matchedNodes[name];
// Remove already removed children
fi = nodes.length;
while (fi--) {
if (!nodes[fi].parent) {
nodes.splice(fi, 1);
}
}
for (i = 0, l = list.length; i < l; i++) {
list[i](nodes, name, args);
}
}
// Run attribute filters
for (i = 0, l = attributeFilters.length; i < l; i++) {
list = attributeFilters[i];
if (list.name in matchedAttributes) {
nodes = matchedAttributes[list.name];
// Remove already removed children
fi = nodes.length;
while (fi--) {
if (!nodes[fi].parent) {
nodes.splice(fi, 1);
}
}
for (fi = 0, fl = list.callbacks.length; fi < fl; fi++) {
list.callbacks[fi](nodes, list.name, args);
}
}
}
}
return rootNode;
};
// Remove <br> at end of block elements Gecko and WebKit injects BR elements to
// make it possible to place the caret inside empty blocks. This logic tries to remove
// these elements and keep br elements that where intended to be there intact
if (settings.remove_trailing_brs) {
self.addNodeFilter('br', function(nodes) {
var i, l = nodes.length, node, blockElements = extend({}, schema.getBlockElements());
var nonEmptyElements = schema.getNonEmptyElements(), parent, lastParent, prev, prevName;
var elementRule, textNode;
// Remove brs from body element as well
blockElements.body = 1;
// Must loop forwards since it will otherwise remove all brs in <p>a<br><br><br></p>
for (i = 0; i < l; i++) {
node = nodes[i];
parent = node.parent;
if (blockElements[node.parent.name] && node === parent.lastChild) {
// Loop all nodes to the left of the current node and check for other BR elements
// excluding bookmarks since they are invisible
prev = node.prev;
while (prev) {
prevName = prev.name;
// Ignore bookmarks
if (prevName !== "span" || prev.attr('data-mce-type') !== 'bookmark') {
// Found a non BR element
if (prevName !== "br") {
break;
}
// Found another br it's a <br><br> structure then don't remove anything
if (prevName === 'br') {
node = null;
break;
}
}
prev = prev.prev;
}
if (node) {
node.remove();
// Is the parent to be considered empty after we removed the BR
if (parent.isEmpty(nonEmptyElements)) {
elementRule = schema.getElementRule(parent.name);
// Remove or padd the element depending on schema rule
if (elementRule) {
if (elementRule.removeEmpty) {
parent.remove();
} else if (elementRule.paddEmpty) {
parent.empty().append(new Node('#text', 3)).value = '\u00a0';
}
}
}
}
} else {
// Replaces BR elements inside inline elements like <p><b><i><br></i></b></p>
// so they become <p><b><i> </i></b></p>
lastParent = node;
while (parent && parent.firstChild === lastParent && parent.lastChild === lastParent) {
lastParent = parent;
if (blockElements[parent.name]) {
break;
}
parent = parent.parent;
}
if (lastParent === parent) {
textNode = new Node('#text', 3);
textNode.value = '\u00a0';
node.replace(textNode);
}
}
}
});
}
// Force anchor names closed, unless the setting "allow_html_in_named_anchor" is explicitly included.
if (!settings.allow_html_in_named_anchor) {
self.addAttributeFilter('id,name', function(nodes) {
var i = nodes.length, sibling, prevSibling, parent, node;
while (i--) {
node = nodes[i];
if (node.name === 'a' && node.firstChild && !node.attr('href')) {
parent = node.parent;
// Move children after current node
sibling = node.lastChild;
do {
prevSibling = sibling.prev;
parent.insert(sibling, node);
sibling = prevSibling;
} while (sibling);
}
}
});
}
if (settings.validate && schema.getValidClasses()) {
self.addAttributeFilter('class', function(nodes) {
var i = nodes.length, node, classList, ci, className, classValue;
var validClasses = schema.getValidClasses(), validClassesMap, valid;
while (i--) {
node = nodes[i];
classList = node.attr('class').split(' ');
classValue = '';
for (ci = 0; ci < classList.length; ci++) {
className = classList[ci];
valid = false;
validClassesMap = validClasses['*'];
if (validClassesMap && validClassesMap[className]) {
valid = true;
}
validClassesMap = validClasses[node.name];
if (!valid && validClassesMap && validClassesMap[className]) {
valid = true;
}
if (valid) {
if (classValue) {
classValue += ' ';
}
classValue += className;
}
}
if (!classValue.length) {
classValue = null;
}
node.attr('class', classValue);
}
});
}
};
});
|