如何使用此 XSLT 文件解析此 Excel XML 导出文件?

发布于 2024-10-21 18:19:53 字数 8885 浏览 1 评论 0原文

我们可以用这个 XSL 文件很好地解析这个测试 XML 文件:

测试 XML:

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet href="newrows.xsl" type="text/xsl"?>
<Workbook>
    <Worksheet>
        <Table>
            <Row>
                <Cell></Cell>
                <Cell>(info...)</Cell>
                <Cell></Cell>
            </Row>
            <Row>
                <Cell>first name</Cell>
                <Cell>last name</Cell>
                <Cell>age</Cell>
            </Row>
            <Row>
                <Cell>Jim</Cell>
                <Cell>Smith</Cell>
                <Cell>34</Cell>
            </Row>
            <Row>
                <Cell>Roy</Cell>
                <Cell>Rogers</Cell>
                <Cell>22</Cell>
            </Row>
            <Row>
                <Cell>(info...)</Cell>
                <Cell></Cell>
                <Cell>(info...)</Cell>
            </Row>

            <Row>
                <Cell>Sally</Cell>
                <Cell>Cloud</Cell>
                <Cell>26</Cell>
            </Row>

            <Row>
                <Cell>John</Cell>
                <Cell>Randall</Cell>
                <Cell>44</Cell>
            </Row>  

        </Table>
    </Worksheet>
</Workbook>

XSL:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"  version="1.0">

    <xsl:output method="xml" indent="yes"/>

    <xsl:param name="range-1-begin"  select="1"/>
    <xsl:param name="range-1-end"  select="3"/>

    <xsl:param name="range-2-begin"  select="5"/>
    <xsl:param name="range-2-end"  select="6"/>

    <xsl:template match="Table">
        <test>
            <xsl:for-each select="Row">
                <xsl:if test="(position() &gt;= $range-1-begin and position() &lt;= $range-1-end)
                    or (position() &gt;= $range-2-begin and position() &lt;= $range-2-end)">
                    <Row>
                       <xsl:for-each select="Cell">
                            <xsl:if test="position() = 1 or position() = 3">
                                <Cell>
                                    <xsl:value-of select="."/>
                                </Cell>
                            </xsl:if>
                        </xsl:for-each>
                    </Row>
                </xsl:if>
            </xsl:for-each>
        </test>
    </xsl:template>

</xsl:stylesheet>

但是,当我们尝试解析从 Excel 导出的这个类似 XML 文件时,它会导出每个字段的内容没有 XML 元素标签。我们甚至可以输入 kksljflskdjf 而不是 Table,它会输出每个 XML 元素的内容。

我必须在 XML/XSL 文件中更改哪些内容才能使 XSL 文件正确解析 XML?

Excel XML(例外):

<?xml version="1.0"?>
<?xml-stylesheet href="blackbox.xsl" type="text/xsl"?>
<Workbook 
xmlns="urn:schemas-microsoft-com:office:spreadsheet" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="urn:schemas-microsoft-com:office:excel" 
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:html="http://www.w3.org/TR/REC-html40">
    <DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
        <Author>MM</Author>
        <LastAuthor>xx</LastAuthor>
        ...
<Worksheet ss:Name="OFFSET Individual">
        <Names>
            <NamedRange ss:Name="_FilterDatabase" ss:RefersTo="='OFFSET Individual'!R3C2:R3C12" ss:Hidden="1"/>
            <NamedRange ss:Name="Print_Area" ss:RefersTo="='OFFSET Individual'!R4C2:R435C15"/>
            <NamedRange ss:Name="Muster" ss:RefersTo="='OFFSET Individual'!C1:C9"/>
            <NamedRange ss:Name="PAP" ss:RefersTo="='OFFSET Individual'!C2"/>
        </Names>
        <Table ss:ExpandedColumnCount="31" ss:ExpandedRowCount="443" x:FullColumns="1" x:FullRows="1" ss:StyleID="s90" ss:DefaultColumnWidth="59" ss:DefaultRowHeight="15">
            <Column ss:StyleID="s416" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="61"/>
            <Column ss:StyleID="s91" ss:AutoFitWidth="0" ss:Width="287"/>
            <Column ss:StyleID="s547" ss:AutoFitWidth="0" ss:Width="216"/>
            <Column ss:StyleID="s91" ss:AutoFitWidth="0" ss:Width="87"/>
            <Column ss:StyleID="s92" ss:AutoFitWidth="0" ss:Width="202"/>
            <Column ss:StyleID="s90" ss:AutoFitWidth="0" ss:Width="87"/>
            <Column ss:StyleID="s101" ss:AutoFitWidth="0" ss:Width="284"/>
            <Column ss:StyleID="s132" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="52"/>
            <Column ss:StyleID="s137" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="47"/>
            <Column ss:StyleID="s90" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="42"/>
            <Column ss:StyleID="s90" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="39"/>
            <Column ss:StyleID="s90" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="37"/>
            <Column ss:StyleID="s113" ss:AutoFitWidth="0" ss:Width="47"/>
            <Column ss:StyleID="s87" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="275"/>
            <Column ss:StyleID="s458" ss:AutoFitWidth="0" ss:Width="89"/>
            <Column ss:StyleID="s179" ss:AutoFitWidth="0" ss:Span="1"/>
            <Column ss:Index="18" ss:StyleID="s168" ss:Hidden="1" ss:AutoFitWidth="0"/>
            <Column ss:StyleID="s90" ss:Hidden="1" ss:AutoFitWidth="0"/>
            <Column ss:StyleID="s377" ss:AutoFitWidth="0" ss:Width="202" ss:Span="2"/>
            <Column ss:Index="23" ss:StyleID="s377" ss:AutoFitWidth="0" ss:Width="203"/>
            <Row ss:AutoFitHeight="0" ss:Height="23">
                <Cell ss:Index="2" ss:StyleID="s142">
                    <Data ss:Type="String">Paper Overview</Data>
                    <NamedCell ss:Name="PAP"/>
                    <NamedCell ss:Name="Muster"/>
                </Cell>
            </Row>
            <Row ss:AutoFitHeight="0">
                <Cell ss:Index="2" ss:StyleID="s141">
                    <Data ss:Type="String">Stand: 10.03.2011; 13:00 Uhr</Data>
                    <NamedCell ss:Name="PAP"/>
                    <NamedCell ss:Name="Muster"/>
                </Cell>
            </Row>
                        ...

以下是生成的“XML”文件的示例:

在此处输入图像描述

附录

这是现在有效的完整解决方案,谢谢@Dimitre!

<xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns="urn:schemas-microsoft-com:office:spreadsheet" 
    xmlns:y="urn:schemas-microsoft-com:office:spreadsheet" 
    xmlns:o="urn:schemas-microsoft-com:office:office" 
    xmlns:x="urn:schemas-microsoft-com:office:excel" 
    xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" 
    xmlns:html="http://www.w3.org/TR/REC-html40"
  exclude-result-prefixes="y o x ss html"
 >

 <xsl:strip-space elements="*"/>
    <xsl:output method="xml" indent="yes"/>

    <xsl:param name="range-1-begin"  select="1"/>
    <xsl:param name="range-1-end"  select="3"/>

    <xsl:param name="range-2-begin"  select="5"/>
    <xsl:param name="range-2-end"  select="6"/>

    <xsl:template match="text()"/> 

    <xsl:template match="y:Table">
        <test>
            <xsl:for-each select="y:Row">
                <xsl:if test="(position() &gt;= $range-1-begin and position() &lt;= $range-1-end)
                    or (position() &gt;= $range-2-begin and position() &lt;= $range-2-end)">
                    <Row>
                       <xsl:for-each select="y:Cell">
                            <xsl:if test="position() = 1 or position() = 3">
                                <Cell>
                                    <xsl:value-of select="."/>
                                </Cell>
                            </xsl:if>
                        </xsl:for-each>
                    </Row>
                </xsl:if>
            </xsl:for-each>
        </test>
    </xsl:template>

</xsl:stylesheet>

We can parse this test XML file with this XSL file fine:

Test XML:

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet href="newrows.xsl" type="text/xsl"?>
<Workbook>
    <Worksheet>
        <Table>
            <Row>
                <Cell></Cell>
                <Cell>(info...)</Cell>
                <Cell></Cell>
            </Row>
            <Row>
                <Cell>first name</Cell>
                <Cell>last name</Cell>
                <Cell>age</Cell>
            </Row>
            <Row>
                <Cell>Jim</Cell>
                <Cell>Smith</Cell>
                <Cell>34</Cell>
            </Row>
            <Row>
                <Cell>Roy</Cell>
                <Cell>Rogers</Cell>
                <Cell>22</Cell>
            </Row>
            <Row>
                <Cell>(info...)</Cell>
                <Cell></Cell>
                <Cell>(info...)</Cell>
            </Row>

            <Row>
                <Cell>Sally</Cell>
                <Cell>Cloud</Cell>
                <Cell>26</Cell>
            </Row>

            <Row>
                <Cell>John</Cell>
                <Cell>Randall</Cell>
                <Cell>44</Cell>
            </Row>  

        </Table>
    </Worksheet>
</Workbook>

XSL:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"  version="1.0">

    <xsl:output method="xml" indent="yes"/>

    <xsl:param name="range-1-begin"  select="1"/>
    <xsl:param name="range-1-end"  select="3"/>

    <xsl:param name="range-2-begin"  select="5"/>
    <xsl:param name="range-2-end"  select="6"/>

    <xsl:template match="Table">
        <test>
            <xsl:for-each select="Row">
                <xsl:if test="(position() >= $range-1-begin and position() <= $range-1-end)
                    or (position() >= $range-2-begin and position() <= $range-2-end)">
                    <Row>
                       <xsl:for-each select="Cell">
                            <xsl:if test="position() = 1 or position() = 3">
                                <Cell>
                                    <xsl:value-of select="."/>
                                </Cell>
                            </xsl:if>
                        </xsl:for-each>
                    </Row>
                </xsl:if>
            </xsl:for-each>
        </test>
    </xsl:template>

</xsl:stylesheet>

However, when we try to parse this similar XML file exported from Excel, it exports the content of every field with no XML element tags. We can even type in kksljflskdjf instead of Table and it outputs the content of every XML element.

What do I have to change in the XML/XSL file so that the XSL file correctly parses the XML?

Excel XML (exceprts):

<?xml version="1.0"?>
<?xml-stylesheet href="blackbox.xsl" type="text/xsl"?>
<Workbook 
xmlns="urn:schemas-microsoft-com:office:spreadsheet" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="urn:schemas-microsoft-com:office:excel" 
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:html="http://www.w3.org/TR/REC-html40">
    <DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
        <Author>MM</Author>
        <LastAuthor>xx</LastAuthor>
        ...
<Worksheet ss:Name="OFFSET Individual">
        <Names>
            <NamedRange ss:Name="_FilterDatabase" ss:RefersTo="='OFFSET Individual'!R3C2:R3C12" ss:Hidden="1"/>
            <NamedRange ss:Name="Print_Area" ss:RefersTo="='OFFSET Individual'!R4C2:R435C15"/>
            <NamedRange ss:Name="Muster" ss:RefersTo="='OFFSET Individual'!C1:C9"/>
            <NamedRange ss:Name="PAP" ss:RefersTo="='OFFSET Individual'!C2"/>
        </Names>
        <Table ss:ExpandedColumnCount="31" ss:ExpandedRowCount="443" x:FullColumns="1" x:FullRows="1" ss:StyleID="s90" ss:DefaultColumnWidth="59" ss:DefaultRowHeight="15">
            <Column ss:StyleID="s416" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="61"/>
            <Column ss:StyleID="s91" ss:AutoFitWidth="0" ss:Width="287"/>
            <Column ss:StyleID="s547" ss:AutoFitWidth="0" ss:Width="216"/>
            <Column ss:StyleID="s91" ss:AutoFitWidth="0" ss:Width="87"/>
            <Column ss:StyleID="s92" ss:AutoFitWidth="0" ss:Width="202"/>
            <Column ss:StyleID="s90" ss:AutoFitWidth="0" ss:Width="87"/>
            <Column ss:StyleID="s101" ss:AutoFitWidth="0" ss:Width="284"/>
            <Column ss:StyleID="s132" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="52"/>
            <Column ss:StyleID="s137" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="47"/>
            <Column ss:StyleID="s90" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="42"/>
            <Column ss:StyleID="s90" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="39"/>
            <Column ss:StyleID="s90" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="37"/>
            <Column ss:StyleID="s113" ss:AutoFitWidth="0" ss:Width="47"/>
            <Column ss:StyleID="s87" ss:Hidden="1" ss:AutoFitWidth="0" ss:Width="275"/>
            <Column ss:StyleID="s458" ss:AutoFitWidth="0" ss:Width="89"/>
            <Column ss:StyleID="s179" ss:AutoFitWidth="0" ss:Span="1"/>
            <Column ss:Index="18" ss:StyleID="s168" ss:Hidden="1" ss:AutoFitWidth="0"/>
            <Column ss:StyleID="s90" ss:Hidden="1" ss:AutoFitWidth="0"/>
            <Column ss:StyleID="s377" ss:AutoFitWidth="0" ss:Width="202" ss:Span="2"/>
            <Column ss:Index="23" ss:StyleID="s377" ss:AutoFitWidth="0" ss:Width="203"/>
            <Row ss:AutoFitHeight="0" ss:Height="23">
                <Cell ss:Index="2" ss:StyleID="s142">
                    <Data ss:Type="String">Paper Overview</Data>
                    <NamedCell ss:Name="PAP"/>
                    <NamedCell ss:Name="Muster"/>
                </Cell>
            </Row>
            <Row ss:AutoFitHeight="0">
                <Cell ss:Index="2" ss:StyleID="s141">
                    <Data ss:Type="String">Stand: 10.03.2011; 13:00 Uhr</Data>
                    <NamedCell ss:Name="PAP"/>
                    <NamedCell ss:Name="Muster"/>
                </Cell>
            </Row>
                        ...

Here is an example of the resulting "XML" file:

enter image description here

Addendum

This is the full solution which now works, thanks @Dimitre!

<xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns="urn:schemas-microsoft-com:office:spreadsheet" 
    xmlns:y="urn:schemas-microsoft-com:office:spreadsheet" 
    xmlns:o="urn:schemas-microsoft-com:office:office" 
    xmlns:x="urn:schemas-microsoft-com:office:excel" 
    xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" 
    xmlns:html="http://www.w3.org/TR/REC-html40"
  exclude-result-prefixes="y o x ss html"
 >

 <xsl:strip-space elements="*"/>
    <xsl:output method="xml" indent="yes"/>

    <xsl:param name="range-1-begin"  select="1"/>
    <xsl:param name="range-1-end"  select="3"/>

    <xsl:param name="range-2-begin"  select="5"/>
    <xsl:param name="range-2-end"  select="6"/>

    <xsl:template match="text()"/> 

    <xsl:template match="y:Table">
        <test>
            <xsl:for-each select="y:Row">
                <xsl:if test="(position() >= $range-1-begin and position() <= $range-1-end)
                    or (position() >= $range-2-begin and position() <= $range-2-end)">
                    <Row>
                       <xsl:for-each select="y:Cell">
                            <xsl:if test="position() = 1 or position() = 3">
                                <Cell>
                                    <xsl:value-of select="."/>
                                </Cell>
                            </xsl:if>
                        </xsl:for-each>
                    </Row>
                </xsl:if>
            </xsl:for-each>
        </test>
    </xsl:template>

</xsl:stylesheet>

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(2

三寸金莲 2024-10-28 18:19:53

我需要改变什么
XML/XSL 文件,以便 XSL 文件
正确解析 XML?

首先,你的术语是很不正确的。 XSLT 转换应用于已解析的 XML 文档。解析(通过 XML 解析器)是能够应用转换的先决条件。

这是关于 XML、XPath 和 XSLT 的常见问题

无法通过第二个文档的名称选择任何元素的原因是因为其中定义了默认命名空间 (xmlns ="urn:schemas-microsoft-com:office:spreadsheet")。

在 XPath 中,任何无前缀的名称都被视为位于“无命名空间”中。因此,匹配 Table 选择 Row 元素的模板将不会匹配/选择任何元素,因为在XML 文档中不存在“无命名空间”中的此类元素。

最易读的解决方案是在 XSLT 样式表中定义相同的命名空间,并在任何 XPath 表达式/匹配模式中使用前缀名称。

因此,在更正后的 XSLT 样式表中,您将拥有

<xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 xmlns:y="urn:schemas-microsoft-com:office:spreadsheet"
 xmlns:o="urn:schemas-microsoft-com:office:office"
 xmlns:x="urn:schemas-microsoft-com:office:excel"
 xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
 xmlns:html="http://www.w3.org/TR/REC-html40"
  exclude-result-prefixes="y o x ss html"
 >
    <xsl:output method="xml" indent="yes"/>

    <xsl:param name="range-1-begin"  select="1"/>
    <xsl:param name="range-1-end"  select="3"/>
    <xsl:param name="range-2-begin"  select="5"/>
    <xsl:param name="range-2-end"  select="6"/>

    <xsl:template match="y:Table">
        <test>
            <xsl:for-each select="y:Row">
                <xsl:if test="(position() >= $range-1-begin and position() <= $range-1-end)                     or (position() >= $range-2-begin and position() <= $range-2-end)">
                    <Row>
                        <xsl:for-each select="Cell">
                            <xsl:if test="position() = 1 or position() = 3">
                                <Cell>
                                    <xsl:value-of select="."/>
                                </Cell>
                            </xsl:if>
                        </xsl:for-each>
                    </Row>
                </xsl:if>
            </xsl:for-each>
        </test>
    </xsl:template>
</xsl:stylesheet>

What do I have to change in the
XML/XSL file so that the XSL file
correctly parses the XML?

First of all, your terminology is quite incorrect. An XSLT transformation is applied on an already parsed XML document. The parsing (by an XML parser) is a prerequisit for being able to apply a transformation.

This is the most FAQ on XML, XPath and in XSLT:

The reason for not being able to select any element by name the second document is because there is a default namesace defined in it (xmlns="urn:schemas-microsoft-com:office:spreadsheet").

In XPath any unprefixed name is considered to be in "no namespace". Therefore the template matching Table and the <xsl:for-each> selecting Row elements will not match/select any element, because in the XML document there are no such elements that are in "no namespace".

The most readable solution is to define the same namespaces in the XSLT stylesheet and to use prefixed names in any XPath expression/match-pattern.

Thus, in the corrected XSLT stylesheet you will have:

<xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 xmlns:y="urn:schemas-microsoft-com:office:spreadsheet"
 xmlns:o="urn:schemas-microsoft-com:office:office"
 xmlns:x="urn:schemas-microsoft-com:office:excel"
 xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
 xmlns:html="http://www.w3.org/TR/REC-html40"
  exclude-result-prefixes="y o x ss html"
 >
    <xsl:output method="xml" indent="yes"/>

    <xsl:param name="range-1-begin"  select="1"/>
    <xsl:param name="range-1-end"  select="3"/>
    <xsl:param name="range-2-begin"  select="5"/>
    <xsl:param name="range-2-end"  select="6"/>

    <xsl:template match="y:Table">
        <test>
            <xsl:for-each select="y:Row">
                <xsl:if test="(position() >= $range-1-begin and position() <= $range-1-end)                     or (position() >= $range-2-begin and position() <= $range-2-end)">
                    <Row>
                        <xsl:for-each select="Cell">
                            <xsl:if test="position() = 1 or position() = 3">
                                <Cell>
                                    <xsl:value-of select="."/>
                                </Cell>
                            </xsl:if>
                        </xsl:for-each>
                    </Row>
                </xsl:if>
            </xsl:for-each>
        </test>
    </xsl:template>
</xsl:stylesheet>
欢你一世 2024-10-28 18:19:53

您的测试 Xml 和 Xsl 不声明和使用任何命名空间,而 Excel Xml 导出定义了各种命名空间:

xmlns="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:x="urn:schemas-microsoft-com:office:excel"
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"

Your Test Xml and Xsl do not declare and use any namespaces whereas the Excel Xml export defines various namespaces:

xmlns="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:x="urn:schemas-microsoft-com:office:excel"
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文