当字符串包含html实体时，在Javascript中设置文本节点的nodeValue

发布于 2024-07-13 08:43:07 字数 137 浏览 14 评论 0原文

当我设置带有＆符号的文本节点的值时，它

node.nodeValue="string with &#xxxx; sort of characters"

会被转义。是否有捷径可寻？

原文

When I set a value of a text node with

node.nodeValue="string with &#xxxx; sort of characters"

ampersand gets escaped. Is there an easy way to do this?

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

凉宸 2024-07-20 08:43:07

您需要对 Unicode 字符使用 Javascript 转义：

node.nodeValue="string with \uxxxx sort of characters"

You need to use Javascript escapes for the Unicode characters:

node.nodeValue="string with \uxxxx sort of characters"

回复收藏 0 原文

思慕 2024-07-20 08:43:07

来自 http://code.google.com/p/jslibs/wiki/JavascriptTips：（

同时转换实体引用和数字实体）

const entityToCode = { __proto__: null,
apos:0x0027,quot:0x0022,amp:0x0026,lt:0x003C,gt:0x003E,nbsp:0x00A0,iexcl:0x00A1,cent:0x00A2,pound:0x00A3,
curren:0x00A4,yen:0x00A5,brvbar:0x00A6,sect:0x00A7,uml:0x00A8,copy:0x00A9,ordf:0x00AA,laquo:0x00AB,
not:0x00AC,shy:0x00AD,reg:0x00AE,macr:0x00AF,deg:0x00B0,plusmn:0x00B1,sup2:0x00B2,sup3:0x00B3,
acute:0x00B4,micro:0x00B5,para:0x00B6,middot:0x00B7,cedil:0x00B8,sup1:0x00B9,ordm:0x00BA,raquo:0x00BB,
frac14:0x00BC,frac12:0x00BD,frac34:0x00BE,iquest:0x00BF,Agrave:0x00C0,Aacute:0x00C1,Acirc:0x00C2,Atilde:0x00C3,
Auml:0x00C4,Aring:0x00C5,AElig:0x00C6,Ccedil:0x00C7,Egrave:0x00C8,Eacute:0x00C9,Ecirc:0x00CA,Euml:0x00CB,
Igrave:0x00CC,Iacute:0x00CD,Icirc:0x00CE,Iuml:0x00CF,ETH:0x00D0,Ntilde:0x00D1,Ograve:0x00D2,Oacute:0x00D3,
Ocirc:0x00D4,Otilde:0x00D5,Ouml:0x00D6,times:0x00D7,Oslash:0x00D8,Ugrave:0x00D9,Uacute:0x00DA,Ucirc:0x00DB,
Uuml:0x00DC,Yacute:0x00DD,THORN:0x00DE,szlig:0x00DF,agrave:0x00E0,aacute:0x00E1,acirc:0x00E2,atilde:0x00E3,
auml:0x00E4,aring:0x00E5,aelig:0x00E6,ccedil:0x00E7,egrave:0x00E8,eacute:0x00E9,ecirc:0x00EA,euml:0x00EB,
igrave:0x00EC,iacute:0x00ED,icirc:0x00EE,iuml:0x00EF,eth:0x00F0,ntilde:0x00F1,ograve:0x00F2,oacute:0x00F3,
ocirc:0x00F4,otilde:0x00F5,ouml:0x00F6,divide:0x00F7,oslash:0x00F8,ugrave:0x00F9,uacute:0x00FA,ucirc:0x00FB,
uuml:0x00FC,yacute:0x00FD,thorn:0x00FE,yuml:0x00FF,OElig:0x0152,oelig:0x0153,Scaron:0x0160,scaron:0x0161,
Yuml:0x0178,fnof:0x0192,circ:0x02C6,tilde:0x02DC,Alpha:0x0391,Beta:0x0392,Gamma:0x0393,Delta:0x0394,
Epsilon:0x0395,Zeta:0x0396,Eta:0x0397,Theta:0x0398,Iota:0x0399,Kappa:0x039A,Lambda:0x039B,Mu:0x039C,
Nu:0x039D,Xi:0x039E,Omicron:0x039F,Pi:0x03A0,Rho:0x03A1,Sigma:0x03A3,Tau:0x03A4,Upsilon:0x03A5,
Phi:0x03A6,Chi:0x03A7,Psi:0x03A8,Omega:0x03A9,alpha:0x03B1,beta:0x03B2,gamma:0x03B3,delta:0x03B4,
epsilon:0x03B5,zeta:0x03B6,eta:0x03B7,theta:0x03B8,iota:0x03B9,kappa:0x03BA,lambda:0x03BB,mu:0x03BC,
nu:0x03BD,xi:0x03BE,omicron:0x03BF,pi:0x03C0,rho:0x03C1,sigmaf:0x03C2,sigma:0x03C3,tau:0x03C4,
upsilon:0x03C5,phi:0x03C6,chi:0x03C7,psi:0x03C8,omega:0x03C9,thetasym:0x03D1,upsih:0x03D2,piv:0x03D6,
ensp:0x2002,emsp:0x2003,thinsp:0x2009,zwnj:0x200C,zwj:0x200D,lrm:0x200E,rlm:0x200F,ndash:0x2013,
mdash:0x2014,lsquo:0x2018,rsquo:0x2019,sbquo:0x201A,ldquo:0x201C,rdquo:0x201D,bdquo:0x201E,dagger:0x2020,
Dagger:0x2021,bull:0x2022,hellip:0x2026,permil:0x2030,prime:0x2032,Prime:0x2033,lsaquo:0x2039,rsaquo:0x203A,
oline:0x203E,frasl:0x2044,euro:0x20AC,image:0x2111,weierp:0x2118,real:0x211C,trade:0x2122,alefsym:0x2135,
larr:0x2190,uarr:0x2191,rarr:0x2192,darr:0x2193,harr:0x2194,crarr:0x21B5,lArr:0x21D0,uArr:0x21D1,
rArr:0x21D2,dArr:0x21D3,hArr:0x21D4,forall:0x2200,part:0x2202,exist:0x2203,empty:0x2205,nabla:0x2207,
isin:0x2208,notin:0x2209,ni:0x220B,prod:0x220F,sum:0x2211,minus:0x2212,lowast:0x2217,radic:0x221A,
prop:0x221D,infin:0x221E,ang:0x2220,and:0x2227,or:0x2228,cap:0x2229,cup:0x222A,int:0x222B,
there4:0x2234,sim:0x223C,cong:0x2245,asymp:0x2248,ne:0x2260,equiv:0x2261,le:0x2264,ge:0x2265,
sub:0x2282,sup:0x2283,nsub:0x2284,sube:0x2286,supe:0x2287,oplus:0x2295,otimes:0x2297,perp:0x22A5,
sdot:0x22C5,lceil:0x2308,rceil:0x2309,lfloor:0x230A,rfloor:0x230B,lang:0x2329,rang:0x232A,loz:0x25CA,
spades:0x2660,clubs:0x2663,hearts:0x2665,diams:0x2666
};

var charToEntity = {};
for ( var entityName in entityToCode )
        charToEntity[String.fromCharCode(entityToCode[entityName])] = entityName;

function EscapeEntities(str) str.replace(/[^\x20-\x7E]/g, function(str) charToEntity[str] ? '&'+charToEntity[str]+';' : str );

function unescapeEntities(str) {
    return str.replace(
     /&(.+?);/g, 
     function(str, ent) {
        return String.fromCharCode( ent[0]!='#' ? entityToCode[ent] : ent[1]=='x' ? parseInt(ent.substr(2),16): parseInt(ent.substr(1)) );
     });
}

From http://code.google.com/p/jslibs/wiki/JavascriptTips:

(converts both entity references and numeric entities)

const entityToCode = { __proto__: null,
apos:0x0027,quot:0x0022,amp:0x0026,lt:0x003C,gt:0x003E,nbsp:0x00A0,iexcl:0x00A1,cent:0x00A2,pound:0x00A3,
curren:0x00A4,yen:0x00A5,brvbar:0x00A6,sect:0x00A7,uml:0x00A8,copy:0x00A9,ordf:0x00AA,laquo:0x00AB,
not:0x00AC,shy:0x00AD,reg:0x00AE,macr:0x00AF,deg:0x00B0,plusmn:0x00B1,sup2:0x00B2,sup3:0x00B3,
acute:0x00B4,micro:0x00B5,para:0x00B6,middot:0x00B7,cedil:0x00B8,sup1:0x00B9,ordm:0x00BA,raquo:0x00BB,
frac14:0x00BC,frac12:0x00BD,frac34:0x00BE,iquest:0x00BF,Agrave:0x00C0,Aacute:0x00C1,Acirc:0x00C2,Atilde:0x00C3,
Auml:0x00C4,Aring:0x00C5,AElig:0x00C6,Ccedil:0x00C7,Egrave:0x00C8,Eacute:0x00C9,Ecirc:0x00CA,Euml:0x00CB,
Igrave:0x00CC,Iacute:0x00CD,Icirc:0x00CE,Iuml:0x00CF,ETH:0x00D0,Ntilde:0x00D1,Ograve:0x00D2,Oacute:0x00D3,
Ocirc:0x00D4,Otilde:0x00D5,Ouml:0x00D6,times:0x00D7,Oslash:0x00D8,Ugrave:0x00D9,Uacute:0x00DA,Ucirc:0x00DB,
Uuml:0x00DC,Yacute:0x00DD,THORN:0x00DE,szlig:0x00DF,agrave:0x00E0,aacute:0x00E1,acirc:0x00E2,atilde:0x00E3,
auml:0x00E4,aring:0x00E5,aelig:0x00E6,ccedil:0x00E7,egrave:0x00E8,eacute:0x00E9,ecirc:0x00EA,euml:0x00EB,
igrave:0x00EC,iacute:0x00ED,icirc:0x00EE,iuml:0x00EF,eth:0x00F0,ntilde:0x00F1,ograve:0x00F2,oacute:0x00F3,
ocirc:0x00F4,otilde:0x00F5,ouml:0x00F6,divide:0x00F7,oslash:0x00F8,ugrave:0x00F9,uacute:0x00FA,ucirc:0x00FB,
uuml:0x00FC,yacute:0x00FD,thorn:0x00FE,yuml:0x00FF,OElig:0x0152,oelig:0x0153,Scaron:0x0160,scaron:0x0161,
Yuml:0x0178,fnof:0x0192,circ:0x02C6,tilde:0x02DC,Alpha:0x0391,Beta:0x0392,Gamma:0x0393,Delta:0x0394,
Epsilon:0x0395,Zeta:0x0396,Eta:0x0397,Theta:0x0398,Iota:0x0399,Kappa:0x039A,Lambda:0x039B,Mu:0x039C,
Nu:0x039D,Xi:0x039E,Omicron:0x039F,Pi:0x03A0,Rho:0x03A1,Sigma:0x03A3,Tau:0x03A4,Upsilon:0x03A5,
Phi:0x03A6,Chi:0x03A7,Psi:0x03A8,Omega:0x03A9,alpha:0x03B1,beta:0x03B2,gamma:0x03B3,delta:0x03B4,
epsilon:0x03B5,zeta:0x03B6,eta:0x03B7,theta:0x03B8,iota:0x03B9,kappa:0x03BA,lambda:0x03BB,mu:0x03BC,
nu:0x03BD,xi:0x03BE,omicron:0x03BF,pi:0x03C0,rho:0x03C1,sigmaf:0x03C2,sigma:0x03C3,tau:0x03C4,
upsilon:0x03C5,phi:0x03C6,chi:0x03C7,psi:0x03C8,omega:0x03C9,thetasym:0x03D1,upsih:0x03D2,piv:0x03D6,
ensp:0x2002,emsp:0x2003,thinsp:0x2009,zwnj:0x200C,zwj:0x200D,lrm:0x200E,rlm:0x200F,ndash:0x2013,
mdash:0x2014,lsquo:0x2018,rsquo:0x2019,sbquo:0x201A,ldquo:0x201C,rdquo:0x201D,bdquo:0x201E,dagger:0x2020,
Dagger:0x2021,bull:0x2022,hellip:0x2026,permil:0x2030,prime:0x2032,Prime:0x2033,lsaquo:0x2039,rsaquo:0x203A,
oline:0x203E,frasl:0x2044,euro:0x20AC,image:0x2111,weierp:0x2118,real:0x211C,trade:0x2122,alefsym:0x2135,
larr:0x2190,uarr:0x2191,rarr:0x2192,darr:0x2193,harr:0x2194,crarr:0x21B5,lArr:0x21D0,uArr:0x21D1,
rArr:0x21D2,dArr:0x21D3,hArr:0x21D4,forall:0x2200,part:0x2202,exist:0x2203,empty:0x2205,nabla:0x2207,
isin:0x2208,notin:0x2209,ni:0x220B,prod:0x220F,sum:0x2211,minus:0x2212,lowast:0x2217,radic:0x221A,
prop:0x221D,infin:0x221E,ang:0x2220,and:0x2227,or:0x2228,cap:0x2229,cup:0x222A,int:0x222B,
there4:0x2234,sim:0x223C,cong:0x2245,asymp:0x2248,ne:0x2260,equiv:0x2261,le:0x2264,ge:0x2265,
sub:0x2282,sup:0x2283,nsub:0x2284,sube:0x2286,supe:0x2287,oplus:0x2295,otimes:0x2297,perp:0x22A5,
sdot:0x22C5,lceil:0x2308,rceil:0x2309,lfloor:0x230A,rfloor:0x230B,lang:0x2329,rang:0x232A,loz:0x25CA,
spades:0x2660,clubs:0x2663,hearts:0x2665,diams:0x2666
};

var charToEntity = {};
for ( var entityName in entityToCode )
        charToEntity[String.fromCharCode(entityToCode[entityName])] = entityName;

function EscapeEntities(str) str.replace(/[^\x20-\x7E]/g, function(str) charToEntity[str] ? '&'+charToEntity[str]+';' : str );

function unescapeEntities(str) {
    return str.replace(
     /&(.+?);/g, 
     function(str, ent) {
        return String.fromCharCode( ent[0]!='#' ? entityToCode[ent] : ent[1]=='x' ? parseInt(ent.substr(2),16): parseInt(ent.substr(1)) );
     });
}

回复收藏 0 原文

薯片软お妹 2024-07-20 08:43:07

发生这种情况的原因是因为 & 字符串中的被浏览器扩展为＆符号实体。为了解决这个问题，您需要自己转换实体。

<html>
<body>
    <div id="test"> </div>
</body>

<script type="text/javascript">

onload = function()
{
    var node = document.getElementById( 'test' );
    node.firstChild.nodeValue = convertEntities( 'Some » entities « and some » more entities «' );
}

function convertEntities( text )
{
    var matches = text.match( /\&\#(\d+);/g );

    for ( var i = 0; i < matches.length; i++ )
    {
        console.log( "Replacing: " + matches[i] );
        console.log( "With: " + convertEntity( matches[i] ) );
        text = text.replace( matches[i], convertEntity( matches[i] ) );
    }

    return text;

    function convertEntity( ent )
    {
        var num = parseInt(ent.replace(/\D/g, ''), 16);
        var esc = ((num < 16) ? '0' : '') + num.toString(16);
        return String.fromCharCode( esc );
    }
}

</script>

</html>

The reason this is happening is because the & in your string is being expanded into the ampersand entity by the browser. To get around this, you'll need to convert the entities yourself.

<html>
<body>
    <div id="test"> </div>
</body>

<script type="text/javascript">

onload = function()
{
    var node = document.getElementById( 'test' );
    node.firstChild.nodeValue = convertEntities( 'Some » entities « and some » more entities «' );
}

function convertEntities( text )
{
    var matches = text.match( /\&\#(\d+);/g );

    for ( var i = 0; i < matches.length; i++ )
    {
        console.log( "Replacing: " + matches[i] );
        console.log( "With: " + convertEntity( matches[i] ) );
        text = text.replace( matches[i], convertEntity( matches[i] ) );
    }

    return text;

    function convertEntity( ent )
    {
        var num = parseInt(ent.replace(/\D/g, ''), 16);
        var esc = ((num < 16) ? '0' : '') + num.toString(16);
        return String.fromCharCode( esc );
    }
}

</script>

</html>

回复收藏 0 原文

羅雙樹 2024-07-20 08:43:07

正如其他答案中所述，我需要用 javascript 编码的实体替换 html 编码的实体。从 BaileyP 的回答开始，我已经做了这个：

function convertEntities( text )
{
    var ret = text.replace( /\&\#(\d+);/g, function ( ent, captureGroup )
    {
        var num = parseInt( captureGroup );
        return String.fromCharCode( num );
    });
    return ret;
}

As noted in other answers, I need to replace html encoded entities with javascript encoded ones. Starting from BaileyP's answer, I've made this:

function convertEntities( text )
{
    var ret = text.replace( /\&\#(\d+);/g, function ( ent, captureGroup )
    {
        var num = parseInt( captureGroup );
        return String.fromCharCode( num );
    });
    return ret;
}

回复收藏 0 原文

小清晰的声音 2024-07-20 08:43:07

OP 有实体/实体引用，并希望它们出现在 DOM 中的文本节点中。

这就是为什么接受的答案和许多其他答案很棒；这些答案将实体转换为其 unicode 等效项 使用 Javascript unicode 转义序列。

但我有不同的需求，我有 unicode 字符，我想将它们作为实体引用放入文本节点中。我特别想要实体引用，以便表示我的文档的 XML 字符串可以用 ASCII 编码（即 encoding="ascii"）。否则，正如 @Bjorn 所说，Unicode 字符将被“解码为垃圾”

这就是我想要的，注意 ASCII 编码：

<?xml version='1.0' encoding='ASCII'?>
<html>
<body>
    “Quotes”
</body>
</html>

上面的 ASCII 编码的 XML/HTML 在浏览器中看起来不错

： sstatic.net/9oCjC.png" rel="nofollow noreferrer"> ASCII 编码的 XML/HTML，使用双引号的实体引用“ 和” 在浏览器中按预期呈现

所以我不能使用其他答案，因为它们插入了 unicode 字符（但我想要 ASCII）。

我无法使用 DOM 文本节点 API 插入未转义实体引用。正如OP指出的：如果您使用DOM文本节点API来设置节点的 textContent 或 nodeValue DOM 将始终转义您尝试注入的任何实体...

...因此 & 变为 & amp;
...并且 “ 变为 “

作为已删除的答案建议，您可以尝试直接操作 HTML使用 innerHTML 或 outerHTML，但 Text API 没有这些属性。

即使您正在处理非文本节点（例如），DOM API
在我的浏览器中，实体不会保持完整，实体被“解析”/取消引用到它们的 utf-8 字符串，例如 “ 变成 “

temp1.innerHTML='“'
'“'  // note how I set 
temp1.innerHTML;
'“'        // note how the unicode character comes back out; not the entity reference

但我想要我的文档是 ASCII 编码的，我不能使用 DOM 设置的 UTF-8 字符； “ 和 ” 将被“解码为垃圾”，如下所示：

使用字符“和”进行 ASCII 编码的 XML/HTML 在浏览器中被“解码为垃圾” < /a>

是的，我可以简单地使用 utf-8 编码，因此我不需要实体引用（示例如下所示），但我更喜欢尊重原始编码（恰好是 ASCII）。

因此，如果您只使用 DOM，则没有好方法来放置未转义的内容实体引用到文本节点时，它们要么被转义，要么取消引用为 utf-8 行为。我认为这是设计/预期行为，我很欣赏那...如果您只是操纵 DOM 来更改浏览器中呈现的内容，这可能没有问题。

但就我而言，我使用 DOM 创建和下载 XML 文档，因此我有机会获取 outerHTML 字符串并独立于 DOM API 操作它下载它。

我获取 outerHTML 并运行下面的函数来转换非 ASCII 字符与其实体等效项 (C# 中的类似方法）。 通过用实体引用替换非 ASCII，我的文档可以编码为 ASCII 并毫无问题地读取。

const replaceNonAsciiWithNumCharRefEntity = (s) => {

    return (s || '')
        .replace(
            /[^\x00-\x7F]/g,
            _ => `&#${_.codePointAt(0)};`
        );
}

The OP has entities / entity references, and wants them to appear in the DOM in a text node.

That's why the accepted answer and many other answers are great; those answers convert entities to their unicode equivalents using Javascript unicode escape sequences.

But I had a different need, I had unicode characters and I want to put them into the text node as entity references. I want entity references specifically so that the XML string representing my document could be encoded in ASCII (i.e. encoding="ascii"). Otherwise, as @Bjorn said, the Unicode characters would be "decoded as junk"

This is what I want, note the ASCII encoding:

<?xml version='1.0' encoding='ASCII'?>
<html>
<body>
    “Quotes”
</body>
</html>

The ASCII encoded XML/HTML above looks good in a browser:

So I can't use the other answers because they insert unicode characters (but I want ASCII).

And I can't use the DOM text node API to insert unescaped entity references. As the OP points out: if you use DOM text node API to set the node's textContent or nodeValue DOM will always escape any entities you try to inject...

...so & becomes &
... and “ becomes “

As a deleted answer suggested, you could try to manipulate HTML directly using innerHTML or outerHTML, but the Text API does not have those properties.

Even if you are working on a non-Text node (like a <span>), the DOM API
in my browser won't leave the entities intact, the entities are "parsed"/dereferenced to their utf-8 strings like “ becomes “

temp1.innerHTML='“'
'“'  // note how I set 
temp1.innerHTML;
'“'        // note how the unicode character comes back out; not the entity reference

But I want my document to be ASCII encoded, I can't use the UTF-8 characters as set by the DOM; “ and ” will be "decoded as junk" as shown below:

Yes I could simply use utf-8 encoding and therefore I don't need entity references (example shown below), but I prefer to respect the original encoding (which happened to be ASCII).

So if you are only using DOM, there's no good way to put unescaped entity references into the text nodes, they are either escaped or dereferenced to utf-8 behavior. I think this is as-designed/expected behavior, and I appreciate that... If you're only manipulating the DOM to change what renders in your browser, this might be no problem.

But in my case I was using the DOM to create and download an XML document, so I had an opportunity to get the outerHTML string and manipulate it independently of the DOM API before downloading it.

I get the outerHTML and run the function below to convert non-ASCII characters to their entity equivalents (similar approach in C#). By replacing the non-ASCII with entity references, my document could be encoded as ASCII and read without problems.

const replaceNonAsciiWithNumCharRefEntity = (s) => {

    return (s || '')
        .replace(
            /[^\x00-\x7F]/g,
            _ => `&#${_.codePointAt(0)};`
        );
}

回复收藏 0 原文

~没有更多了~