当字符串包含html实体时,在Javascript中设置文本节点的nodeValue

发布于 2024-07-13 08:43:07 字数 137 浏览 9 评论 0原文

当我设置带有&符号的文本节点的值时,它

node.nodeValue="string with &#xxxx; sort of characters"

会被转义。 是否有捷径可寻?

When I set a value of a text node with

node.nodeValue="string with &#xxxx; sort of characters"

ampersand gets escaped. Is there an easy way to do this?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(5

凉宸 2024-07-20 08:43:07

您需要对 Unicode 字符使用 Javascript 转义:

node.nodeValue="string with \uxxxx sort of characters"

You need to use Javascript escapes for the Unicode characters:

node.nodeValue="string with \uxxxx sort of characters"
思慕 2024-07-20 08:43:07

来自http://code.google.com/p/jslibs/wiki/JavascriptTips:(

同时转换实体引用和数字实体)

const entityToCode = { __proto__: null,
apos:0x0027,quot:0x0022,amp:0x0026,lt:0x003C,gt:0x003E,nbsp:0x00A0,iexcl:0x00A1,cent:0x00A2,pound:0x00A3,
curren:0x00A4,yen:0x00A5,brvbar:0x00A6,sect:0x00A7,uml:0x00A8,copy:0x00A9,ordf:0x00AA,laquo:0x00AB,
not:0x00AC,shy:0x00AD,reg:0x00AE,macr:0x00AF,deg:0x00B0,plusmn:0x00B1,sup2:0x00B2,sup3:0x00B3,
acute:0x00B4,micro:0x00B5,para:0x00B6,middot:0x00B7,cedil:0x00B8,sup1:0x00B9,ordm:0x00BA,raquo:0x00BB,
frac14:0x00BC,frac12:0x00BD,frac34:0x00BE,iquest:0x00BF,Agrave:0x00C0,Aacute:0x00C1,Acirc:0x00C2,Atilde:0x00C3,
Auml:0x00C4,Aring:0x00C5,AElig:0x00C6,Ccedil:0x00C7,Egrave:0x00C8,Eacute:0x00C9,Ecirc:0x00CA,Euml:0x00CB,
Igrave:0x00CC,Iacute:0x00CD,Icirc:0x00CE,Iuml:0x00CF,ETH:0x00D0,Ntilde:0x00D1,Ograve:0x00D2,Oacute:0x00D3,
Ocirc:0x00D4,Otilde:0x00D5,Ouml:0x00D6,times:0x00D7,Oslash:0x00D8,Ugrave:0x00D9,Uacute:0x00DA,Ucirc:0x00DB,
Uuml:0x00DC,Yacute:0x00DD,THORN:0x00DE,szlig:0x00DF,agrave:0x00E0,aacute:0x00E1,acirc:0x00E2,atilde:0x00E3,
auml:0x00E4,aring:0x00E5,aelig:0x00E6,ccedil:0x00E7,egrave:0x00E8,eacute:0x00E9,ecirc:0x00EA,euml:0x00EB,
igrave:0x00EC,iacute:0x00ED,icirc:0x00EE,iuml:0x00EF,eth:0x00F0,ntilde:0x00F1,ograve:0x00F2,oacute:0x00F3,
ocirc:0x00F4,otilde:0x00F5,ouml:0x00F6,divide:0x00F7,oslash:0x00F8,ugrave:0x00F9,uacute:0x00FA,ucirc:0x00FB,
uuml:0x00FC,yacute:0x00FD,thorn:0x00FE,yuml:0x00FF,OElig:0x0152,oelig:0x0153,Scaron:0x0160,scaron:0x0161,
Yuml:0x0178,fnof:0x0192,circ:0x02C6,tilde:0x02DC,Alpha:0x0391,Beta:0x0392,Gamma:0x0393,Delta:0x0394,
Epsilon:0x0395,Zeta:0x0396,Eta:0x0397,Theta:0x0398,Iota:0x0399,Kappa:0x039A,Lambda:0x039B,Mu:0x039C,
Nu:0x039D,Xi:0x039E,Omicron:0x039F,Pi:0x03A0,Rho:0x03A1,Sigma:0x03A3,Tau:0x03A4,Upsilon:0x03A5,
Phi:0x03A6,Chi:0x03A7,Psi:0x03A8,Omega:0x03A9,alpha:0x03B1,beta:0x03B2,gamma:0x03B3,delta:0x03B4,
epsilon:0x03B5,zeta:0x03B6,eta:0x03B7,theta:0x03B8,iota:0x03B9,kappa:0x03BA,lambda:0x03BB,mu:0x03BC,
nu:0x03BD,xi:0x03BE,omicron:0x03BF,pi:0x03C0,rho:0x03C1,sigmaf:0x03C2,sigma:0x03C3,tau:0x03C4,
upsilon:0x03C5,phi:0x03C6,chi:0x03C7,psi:0x03C8,omega:0x03C9,thetasym:0x03D1,upsih:0x03D2,piv:0x03D6,
ensp:0x2002,emsp:0x2003,thinsp:0x2009,zwnj:0x200C,zwj:0x200D,lrm:0x200E,rlm:0x200F,ndash:0x2013,
mdash:0x2014,lsquo:0x2018,rsquo:0x2019,sbquo:0x201A,ldquo:0x201C,rdquo:0x201D,bdquo:0x201E,dagger:0x2020,
Dagger:0x2021,bull:0x2022,hellip:0x2026,permil:0x2030,prime:0x2032,Prime:0x2033,lsaquo:0x2039,rsaquo:0x203A,
oline:0x203E,frasl:0x2044,euro:0x20AC,image:0x2111,weierp:0x2118,real:0x211C,trade:0x2122,alefsym:0x2135,
larr:0x2190,uarr:0x2191,rarr:0x2192,darr:0x2193,harr:0x2194,crarr:0x21B5,lArr:0x21D0,uArr:0x21D1,
rArr:0x21D2,dArr:0x21D3,hArr:0x21D4,forall:0x2200,part:0x2202,exist:0x2203,empty:0x2205,nabla:0x2207,
isin:0x2208,notin:0x2209,ni:0x220B,prod:0x220F,sum:0x2211,minus:0x2212,lowast:0x2217,radic:0x221A,
prop:0x221D,infin:0x221E,ang:0x2220,and:0x2227,or:0x2228,cap:0x2229,cup:0x222A,int:0x222B,
there4:0x2234,sim:0x223C,cong:0x2245,asymp:0x2248,ne:0x2260,equiv:0x2261,le:0x2264,ge:0x2265,
sub:0x2282,sup:0x2283,nsub:0x2284,sube:0x2286,supe:0x2287,oplus:0x2295,otimes:0x2297,perp:0x22A5,
sdot:0x22C5,lceil:0x2308,rceil:0x2309,lfloor:0x230A,rfloor:0x230B,lang:0x2329,rang:0x232A,loz:0x25CA,
spades:0x2660,clubs:0x2663,hearts:0x2665,diams:0x2666
};

var charToEntity = {};
for ( var entityName in entityToCode )
        charToEntity[String.fromCharCode(entityToCode[entityName])] = entityName;

function EscapeEntities(str) str.replace(/[^\x20-\x7E]/g, function(str) charToEntity[str] ? '&'+charToEntity[str]+';' : str );
function unescapeEntities(str) {
    return str.replace(
     /&(.+?);/g, 
     function(str, ent) {
        return String.fromCharCode( ent[0]!='#' ? entityToCode[ent] : ent[1]=='x' ? parseInt(ent.substr(2),16): parseInt(ent.substr(1)) );
     });
} 

From http://code.google.com/p/jslibs/wiki/JavascriptTips:

(converts both entity references and numeric entities)

const entityToCode = { __proto__: null,
apos:0x0027,quot:0x0022,amp:0x0026,lt:0x003C,gt:0x003E,nbsp:0x00A0,iexcl:0x00A1,cent:0x00A2,pound:0x00A3,
curren:0x00A4,yen:0x00A5,brvbar:0x00A6,sect:0x00A7,uml:0x00A8,copy:0x00A9,ordf:0x00AA,laquo:0x00AB,
not:0x00AC,shy:0x00AD,reg:0x00AE,macr:0x00AF,deg:0x00B0,plusmn:0x00B1,sup2:0x00B2,sup3:0x00B3,
acute:0x00B4,micro:0x00B5,para:0x00B6,middot:0x00B7,cedil:0x00B8,sup1:0x00B9,ordm:0x00BA,raquo:0x00BB,
frac14:0x00BC,frac12:0x00BD,frac34:0x00BE,iquest:0x00BF,Agrave:0x00C0,Aacute:0x00C1,Acirc:0x00C2,Atilde:0x00C3,
Auml:0x00C4,Aring:0x00C5,AElig:0x00C6,Ccedil:0x00C7,Egrave:0x00C8,Eacute:0x00C9,Ecirc:0x00CA,Euml:0x00CB,
Igrave:0x00CC,Iacute:0x00CD,Icirc:0x00CE,Iuml:0x00CF,ETH:0x00D0,Ntilde:0x00D1,Ograve:0x00D2,Oacute:0x00D3,
Ocirc:0x00D4,Otilde:0x00D5,Ouml:0x00D6,times:0x00D7,Oslash:0x00D8,Ugrave:0x00D9,Uacute:0x00DA,Ucirc:0x00DB,
Uuml:0x00DC,Yacute:0x00DD,THORN:0x00DE,szlig:0x00DF,agrave:0x00E0,aacute:0x00E1,acirc:0x00E2,atilde:0x00E3,
auml:0x00E4,aring:0x00E5,aelig:0x00E6,ccedil:0x00E7,egrave:0x00E8,eacute:0x00E9,ecirc:0x00EA,euml:0x00EB,
igrave:0x00EC,iacute:0x00ED,icirc:0x00EE,iuml:0x00EF,eth:0x00F0,ntilde:0x00F1,ograve:0x00F2,oacute:0x00F3,
ocirc:0x00F4,otilde:0x00F5,ouml:0x00F6,divide:0x00F7,oslash:0x00F8,ugrave:0x00F9,uacute:0x00FA,ucirc:0x00FB,
uuml:0x00FC,yacute:0x00FD,thorn:0x00FE,yuml:0x00FF,OElig:0x0152,oelig:0x0153,Scaron:0x0160,scaron:0x0161,
Yuml:0x0178,fnof:0x0192,circ:0x02C6,tilde:0x02DC,Alpha:0x0391,Beta:0x0392,Gamma:0x0393,Delta:0x0394,
Epsilon:0x0395,Zeta:0x0396,Eta:0x0397,Theta:0x0398,Iota:0x0399,Kappa:0x039A,Lambda:0x039B,Mu:0x039C,
Nu:0x039D,Xi:0x039E,Omicron:0x039F,Pi:0x03A0,Rho:0x03A1,Sigma:0x03A3,Tau:0x03A4,Upsilon:0x03A5,
Phi:0x03A6,Chi:0x03A7,Psi:0x03A8,Omega:0x03A9,alpha:0x03B1,beta:0x03B2,gamma:0x03B3,delta:0x03B4,
epsilon:0x03B5,zeta:0x03B6,eta:0x03B7,theta:0x03B8,iota:0x03B9,kappa:0x03BA,lambda:0x03BB,mu:0x03BC,
nu:0x03BD,xi:0x03BE,omicron:0x03BF,pi:0x03C0,rho:0x03C1,sigmaf:0x03C2,sigma:0x03C3,tau:0x03C4,
upsilon:0x03C5,phi:0x03C6,chi:0x03C7,psi:0x03C8,omega:0x03C9,thetasym:0x03D1,upsih:0x03D2,piv:0x03D6,
ensp:0x2002,emsp:0x2003,thinsp:0x2009,zwnj:0x200C,zwj:0x200D,lrm:0x200E,rlm:0x200F,ndash:0x2013,
mdash:0x2014,lsquo:0x2018,rsquo:0x2019,sbquo:0x201A,ldquo:0x201C,rdquo:0x201D,bdquo:0x201E,dagger:0x2020,
Dagger:0x2021,bull:0x2022,hellip:0x2026,permil:0x2030,prime:0x2032,Prime:0x2033,lsaquo:0x2039,rsaquo:0x203A,
oline:0x203E,frasl:0x2044,euro:0x20AC,image:0x2111,weierp:0x2118,real:0x211C,trade:0x2122,alefsym:0x2135,
larr:0x2190,uarr:0x2191,rarr:0x2192,darr:0x2193,harr:0x2194,crarr:0x21B5,lArr:0x21D0,uArr:0x21D1,
rArr:0x21D2,dArr:0x21D3,hArr:0x21D4,forall:0x2200,part:0x2202,exist:0x2203,empty:0x2205,nabla:0x2207,
isin:0x2208,notin:0x2209,ni:0x220B,prod:0x220F,sum:0x2211,minus:0x2212,lowast:0x2217,radic:0x221A,
prop:0x221D,infin:0x221E,ang:0x2220,and:0x2227,or:0x2228,cap:0x2229,cup:0x222A,int:0x222B,
there4:0x2234,sim:0x223C,cong:0x2245,asymp:0x2248,ne:0x2260,equiv:0x2261,le:0x2264,ge:0x2265,
sub:0x2282,sup:0x2283,nsub:0x2284,sube:0x2286,supe:0x2287,oplus:0x2295,otimes:0x2297,perp:0x22A5,
sdot:0x22C5,lceil:0x2308,rceil:0x2309,lfloor:0x230A,rfloor:0x230B,lang:0x2329,rang:0x232A,loz:0x25CA,
spades:0x2660,clubs:0x2663,hearts:0x2665,diams:0x2666
};

var charToEntity = {};
for ( var entityName in entityToCode )
        charToEntity[String.fromCharCode(entityToCode[entityName])] = entityName;

function EscapeEntities(str) str.replace(/[^\x20-\x7E]/g, function(str) charToEntity[str] ? '&'+charToEntity[str]+';' : str );
function unescapeEntities(str) {
    return str.replace(
     /&(.+?);/g, 
     function(str, ent) {
        return String.fromCharCode( ent[0]!='#' ? entityToCode[ent] : ent[1]=='x' ? parseInt(ent.substr(2),16): parseInt(ent.substr(1)) );
     });
} 
薯片软お妹 2024-07-20 08:43:07

发生这种情况的原因是因为 & 字符串中的 被浏览器扩展为&符号实体。 为了解决这个问题,您需要自己转换实体。

<html>
<body>
    <div id="test"> </div>
</body>

<script type="text/javascript">

onload = function()
{
    var node = document.getElementById( 'test' );
    node.firstChild.nodeValue = convertEntities( 'Some » entities « and some » more entities «' );
}

function convertEntities( text )
{
    var matches = text.match( /\&\#(\d+);/g );

    for ( var i = 0; i < matches.length; i++ )
    {
        console.log( "Replacing: " + matches[i] );
        console.log( "With: " + convertEntity( matches[i] ) );
        text = text.replace( matches[i], convertEntity( matches[i] ) );
    }

    return text;

    function convertEntity( ent )
    {
        var num = parseInt(ent.replace(/\D/g, ''), 16);
        var esc = ((num < 16) ? '0' : '') + num.toString(16);
        return String.fromCharCode( esc );
    }
}

</script>

</html>

The reason this is happening is because the & in your string is being expanded into the ampersand entity by the browser. To get around this, you'll need to convert the entities yourself.

<html>
<body>
    <div id="test"> </div>
</body>

<script type="text/javascript">

onload = function()
{
    var node = document.getElementById( 'test' );
    node.firstChild.nodeValue = convertEntities( 'Some » entities « and some » more entities «' );
}

function convertEntities( text )
{
    var matches = text.match( /\&\#(\d+);/g );

    for ( var i = 0; i < matches.length; i++ )
    {
        console.log( "Replacing: " + matches[i] );
        console.log( "With: " + convertEntity( matches[i] ) );
        text = text.replace( matches[i], convertEntity( matches[i] ) );
    }

    return text;

    function convertEntity( ent )
    {
        var num = parseInt(ent.replace(/\D/g, ''), 16);
        var esc = ((num < 16) ? '0' : '') + num.toString(16);
        return String.fromCharCode( esc );
    }
}

</script>

</html>
羅雙樹 2024-07-20 08:43:07

正如其他答案中所述,我需要用 javascript 编码的实体替换 html 编码的实体。 从 BaileyP 的回答开始,我已经做了这个:

function convertEntities( text )
{
    var ret = text.replace( /\&\#(\d+);/g, function ( ent, captureGroup )
    {
        var num = parseInt( captureGroup );
        return String.fromCharCode( num );
    });
    return ret;
}

As noted in other answers, I need to replace html encoded entities with javascript encoded ones. Starting from BaileyP's answer, I've made this:

function convertEntities( text )
{
    var ret = text.replace( /\&\#(\d+);/g, function ( ent, captureGroup )
    {
        var num = parseInt( captureGroup );
        return String.fromCharCode( num );
    });
    return ret;
}
小清晰的声音 2024-07-20 08:43:07

OP 有实体/实体引用,并希望它们出现在 DOM 中的文本节点中。

这就是为什么接受的答案和许多其他答案很棒; 这些答案将实体转换为其 unicode 等效项 使用 Javascript unicode 转义序列

但我有不同的需求,我有 unicode 字符,我想将它们作为实体引用放入文本节点中。 我特别想要实体引用,以便表示我的文档的 XML 字符串可以用 ASCII 编码(即 encoding="ascii")。 否则,正如 @Bjorn 所说,Unicode 字符将被“解码为垃圾”

这就是我想要的,注意 ASCII 编码:

<?xml version='1.0' encoding='ASCII'?>
<html>
<body>
    “Quotes”
</body>
</html>

上面的 ASCII 编码的 XML/HTML 在浏览器中看起来不错

: sstatic.net/9oCjC.png" rel="nofollow noreferrer">ASCII 编码的 XML/HTML,使用双引号的实体引用“ 和” 在浏览器中按预期呈现

所以我不能使用其他答案,因为它们插入了 unicode 字符(但我想要 ASCII)。

我无法使用 DOM 文本节点 API 插入未转义实体引用。 正如OP指出的:如果您使用DOM文本节点API来设置节点的 textContentnodeValue DOM 将 始终转义您尝试注入的任何实体...

  • ...因此 & 变为 & amp;
  • ...并且 变为

作为已删除的答案建议,您可以尝试直接操作 HTML使用 innerHTMLouterHTML,但 Text API 没有这些属性

即使您正在处理非文本节点(例如 ),DOM API
在我的浏览器中,实体不会保持完整,实体被“解析”/取消引用到它们的 utf-8 字符串,例如 变成

temp1.innerHTML='“'
'“'  // note how I set 
temp1.innerHTML;
'“'        // note how the unicode character comes back out; not the entity reference

但我想要我的文档是 ASCII 编码的,我不能使用 DOM 设置的 UTF-8 字符; 将被“解码为垃圾”,如下所示:

使用字符“和”进行 ASCII 编码的 XML/HTML 在浏览器中被“解码为垃圾”< /a>

是的,我可以简单地使用 utf-8 编码,因此我不需要实体引用(示例如下所示),但我更喜欢尊重原始编码(恰好是 ASCII)

utf- 8 使用字符“和”编码的 XML/HTML;在浏览器中按预期呈现,但我不想要 utf-8 编码!

因此,如果您只使用 DOM,则没有好方法来放置未转义的内容实体引用到文本节点时,它们要么被转义,要么取消引用为 utf-8 行为。 我认为这是设计/预期行为,我很欣赏那...如果您只是操纵 DOM 来更改浏览器中呈现的内容,这可能没有问题。

但就我而言,我使用 DOM 创建和下载 XML 文档,因此我有机会获取 outerHTML 字符串并独立于 DOM API 操作它下载它。

我获取 outerHTML 并运行下面的函数来转换 非 ASCII 字符与其实体等效项 (C# 中的类似方法)。 通过用实体引用替换非 ASCII,我的文档可以编码为 ASCII 并毫无问题地读取。

const replaceNonAsciiWithNumCharRefEntity = (s) => {

    return (s || '')
        .replace(
            /[^\x00-\x7F]/g,
            _ => `&#${_.codePointAt(0)};`
        );
}

The OP has entities / entity references, and wants them to appear in the DOM in a text node.

That's why the accepted answer and many other answers are great; those answers convert entities to their unicode equivalents using Javascript unicode escape sequences.

But I had a different need, I had unicode characters and I want to put them into the text node as entity references. I want entity references specifically so that the XML string representing my document could be encoded in ASCII (i.e. encoding="ascii"). Otherwise, as @Bjorn said, the Unicode characters would be "decoded as junk"

This is what I want, note the ASCII encoding:

<?xml version='1.0' encoding='ASCII'?>
<html>
<body>
    “Quotes”
</body>
</html>

The ASCII encoded XML/HTML above looks good in a browser:

ASCII encoded XML/HTML using entity references for double quotes “ and ” renders as expected in a browser

So I can't use the other answers because they insert unicode characters (but I want ASCII).

And I can't use the DOM text node API to insert unescaped entity references. As the OP points out: if you use DOM text node API to set the node's textContent or nodeValue DOM will always escape any entities you try to inject...

  • ...so & becomes &
  • ... and becomes &#8220;

As a deleted answer suggested, you could try to manipulate HTML directly using innerHTML or outerHTML, but the Text API does not have those properties.

Even if you are working on a non-Text node (like a <span>), the DOM API
in my browser won't leave the entities intact, the entities are "parsed"/dereferenced to their utf-8 strings like becomes

temp1.innerHTML='“'
'“'  // note how I set 
temp1.innerHTML;
'“'        // note how the unicode character comes back out; not the entity reference

But I want my document to be ASCII encoded, I can't use the UTF-8 characters as set by the DOM; and will be "decoded as junk" as shown below:

ASCII encoded XML/HTML using characters “ and ”; gets "decoded as junk" in a browser

Yes I could simply use utf-8 encoding and therefore I don't need entity references (example shown below), but I prefer to respect the original encoding (which happened to be ASCII).

utf-8 encoded XML/HTML using characters “ and ”; renders as expected in the browser, but I don't want utf-8 encoding!

So if you are only using DOM, there's no good way to put unescaped entity references into the text nodes, they are either escaped or dereferenced to utf-8 behavior. I think this is as-designed/expected behavior, and I appreciate that... If you're only manipulating the DOM to change what renders in your browser, this might be no problem.

But in my case I was using the DOM to create and download an XML document, so I had an opportunity to get the outerHTML string and manipulate it independently of the DOM API before downloading it.

I get the outerHTML and run the function below to convert non-ASCII characters to their entity equivalents (similar approach in C#). By replacing the non-ASCII with entity references, my document could be encoded as ASCII and read without problems.

const replaceNonAsciiWithNumCharRefEntity = (s) => {

    return (s || '')
        .replace(
            /[^\x00-\x7F]/g,
            _ => `&#${_.codePointAt(0)};`
        );
}
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文