On May 27, 12:58 am, "David" <n...@none.comwrote:
Hi All,
I am working on a script that is theoreticaly simple but I can not get it to
work completely. I am dealing with a page spit out by .NET that leaves empty
tags in the markup. I need a javascript solution to go behind and do a clean
up after the page loads.
The .NET will leave behind any combination of nested tags. Here is an
example below. Even though the <spantags are not empty, as they contain
<emtags they also need to be removed.
<p>
<span><em></em></span>
<span><em></em></span>
<span><em></em></span>
</p>
Here is a simple test page of what I have done so far. It does remove some
of the tags but always leaves behind some empty tags...http://mysite.verizon.net/res8xvny/removeTags.html
Have you considered going down the DOM and remove any in-line element
whose textContent or innerText is empty? That way you don't have to
go down nested empty nodes, they will be removed as soon as you reach
the highest ancestor.
function getText(el)
{
if (typeof el == 'string') el = document.getElementById(el);
// Try DOM 3 textContent property first
if (typeof el.textContent == 'string') {return el.textContent;}
// Try MS innerText property
if (typeof el.innerText == 'string') {return el.innerText;}
return rec(el);
// Recurse over child nodes
function rec(el) {
var n, x = el.childNodes;
var txt = [];
for (var i=0, len=x.length; i<len; ++i){
n = x[i];
// Use TEXT_NODE and ELEMENT_NODE as apparently IE 8 will
// "not support enumeration of nodeType constant values"
// G. Talbert clj
if (n.TEXT_NODE == n.nodeType) {
txt.push(n.data);
} else if (n.ELEMENT_NODE == n.nodeType) {
txt.push(rec(n));
}
}
return txt.join('').replace(/\s+/g,' ');
}
}
function removeEmptyNodes() {
var node, nodes = document.getElementsByTagName('*');
// These nodes are allowed to be empty
var allowedEmpty = 'base basefont body br col hr html image '
+ 'input isindex link meta param title';
var re;
// Collection is live, so as remove nodes, length gets shorter
for (var i=0; i<nodes.length; i++) {
node = nodes[i];
re = new RegExp('\\b'+node.tagName+'\\b','i');
// Only removes nodes where textContent is '', but could extend
// to remove any node where textContent is matches \s*
if (!re.test(allowedEmpty) && getText(node) == '') {
node.parentNode.removeChild(node);
// i node removed, so backup
--i;
}
}
}
--
Rob