从 HTML TD 和 Tr 中提取值

发布于 2024-12-25 15:58:05 字数 3574 浏览 1 评论 0 原文

我有一些从选项报价网站获得的 HTML 源代码。 (请参见下文)

根据执行价格提取 tr 中的各种文本值并将其存储在集合中的最佳方法是什么(在本例中为 4700,在 td 中期 4700.00 中可用)

有些人推荐正则表达式,而其他建议使用 html 解析器。我在 VBA 中执行此操作,那么最好的方法是什么?

<!--<td><a href="javascript:popup1('','','1')">Quote</a></td>

<td><a href="javascript:popup1('','','','','CE')"><img src="/images/print3.gif"></a>



</td>-->





                    <td><a href="javascript:chartPopup('NIFTY', 'OPTIDX', '25JAN2012', '4700.00','CE','S&P CNX NIFTY');"><img src="/live_market/resources/images/grficon.gif" /></a></td>

                        <td class="ylwbg"> 2,935,500</td>

                        <td class="ylwbg"> 27,550</td>

                        <td class="ylwbg"> 12,458</td>


                        <td class="ylwbg"> 23.79</td>

                        <!-- End-->

                        <td class="ylwbg">





                            <a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=4700.00&type=CE&expiry=25JAN2012" target="_blank"> 139.25</a>



                        </td>

                        <!--*Net Change*-->



                        <td class="ylwbg" Style="color:Red;"> -7.35</td>



                        <td class="ylwbg"> 200</td>

                        <td class="ylwbg"> 139.15</td>

                        <td class="ylwbg"> 142.45</td>

                        <td class="ylwbg"> 200</td>

                        <td class="grybg"><a href="/live_market/dynaContent/live_watch/option_chain/optionDates.jsp?symbol=NIFTY&instrument=OPTIDX&strike=4700.00"><b>4700.00</b></a></td>

                        <td class="nobg"> 1,300</td>

                        <td class="nobg"> 76.00</td>

                        <td class="nobg"> 79.00</td>

                        <td class="nobg"> 1,350</td>



                        <!--*Net Change*-->



                            <td class="nobg" Style="color:Red;"> -1.55</td>





                        <td class="nobg">



                            <!-- <a href="javascript:popup1('NIFTY','OPTIDX','25JAN2012','4700.00','PE')"> 76.00</a> -->



                            <a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=4700.00&type=PE&expiry=25JAN2012" target="_blank"> 76.00</a>







                        </td>



                        <td class="nobg"> 26.33</td>



                        <td class="nobg"> 32,772</td>

                        <td class="nobg"> 103,700</td>



                        <td class="nobg"> 5,123,300</td>



                        <td><a href="javascript:chartPopup('NIFTY', 'OPTIDX', '25JAN2012', '4700.00','PE','S&P CNX NIFTY');"><img src="/live_market/resources/images/grficon.gif" /></a></td>



<!--<td><a href="javascript:popup1('','','1')">Quote</a></td>

<td><a href="javascript:popup1('','','','','PE')"><img src="/images/print3.gif"></a></td>-->



                    </tr>

I have some HTML source that i get from a website for option quotes. (please see below)

What is the best way to extract the various text values in tr and store in a collection based on the strike price (4700 in this case available in the mid td 4700.00)

Some people recommend regex while other suggest to use a html parser. I'm doing this in VBA so whats the best way?

<!--<td><a href="javascript:popup1('','','1')">Quote</a></td>

<td><a href="javascript:popup1('','','','','CE')"><img src="/images/print3.gif"></a>



</td>-->





                    <td><a href="javascript:chartPopup('NIFTY', 'OPTIDX', '25JAN2012', '4700.00','CE','S&P CNX NIFTY');"><img src="/live_market/resources/images/grficon.gif" /></a></td>

                        <td class="ylwbg"> 2,935,500</td>

                        <td class="ylwbg"> 27,550</td>

                        <td class="ylwbg"> 12,458</td>


                        <td class="ylwbg"> 23.79</td>

                        <!-- End-->

                        <td class="ylwbg">





                            <a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=4700.00&type=CE&expiry=25JAN2012" target="_blank"> 139.25</a>



                        </td>

                        <!--*Net Change*-->



                        <td class="ylwbg" Style="color:Red;"> -7.35</td>



                        <td class="ylwbg"> 200</td>

                        <td class="ylwbg"> 139.15</td>

                        <td class="ylwbg"> 142.45</td>

                        <td class="ylwbg"> 200</td>

                        <td class="grybg"><a href="/live_market/dynaContent/live_watch/option_chain/optionDates.jsp?symbol=NIFTY&instrument=OPTIDX&strike=4700.00"><b>4700.00</b></a></td>

                        <td class="nobg"> 1,300</td>

                        <td class="nobg"> 76.00</td>

                        <td class="nobg"> 79.00</td>

                        <td class="nobg"> 1,350</td>



                        <!--*Net Change*-->



                            <td class="nobg" Style="color:Red;"> -1.55</td>





                        <td class="nobg">



                            <!-- <a href="javascript:popup1('NIFTY','OPTIDX','25JAN2012','4700.00','PE')"> 76.00</a> -->



                            <a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=4700.00&type=PE&expiry=25JAN2012" target="_blank"> 76.00</a>







                        </td>



                        <td class="nobg"> 26.33</td>



                        <td class="nobg"> 32,772</td>

                        <td class="nobg"> 103,700</td>



                        <td class="nobg"> 5,123,300</td>



                        <td><a href="javascript:chartPopup('NIFTY', 'OPTIDX', '25JAN2012', '4700.00','PE','S&P CNX NIFTY');"><img src="/live_market/resources/images/grficon.gif" /></a></td>



<!--<td><a href="javascript:popup1('','','1')">Quote</a></td>

<td><a href="javascript:popup1('','','','','PE')"><img src="/images/print3.gif"></a></td>-->



                    </tr>

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

深府石板幽径 2025-01-01 15:58:05

经过一番摆弄后,我使用 XMLHTTP 导出了一个正则表达式/VBA 解决方案

  1. 来访问该站点(更改 strSite 以适应)
  2. 一个正则表达式来获取所需的数字,
  3. 一个包含 20 条记录的变体数组,然后转储数字到活动工作表

output
查看源 HTML 以查找正则表达式模式

Call 选项有一个共同的起始和结束字符串来分隔 10 个值,但有 3 个不同的字符串 Strings

  1. 1-4,每条记录匹配 7-10 X
  2. 字符串 6 有一个Style(和其他文本)位于 X 之前的 > 之前,
  3. 字符串 5 包含更长的 X

的正则表达式
.Pattern = "((.+?)(<\/td>)"
提取所有需要的字符串,但稍后需要对字符串 5 进行进一步的工作

Put 选项以 开头,因此很高兴这些不会被获得点 1-3 的正则表达式

在此处输入图像描述
实际代码

    Sub GetTxt()
    Dim objXmlHTTP As Object
    Dim objRegex As Object
    Dim objRegMC As Object
    Dim objRegM As Object
    Dim strResponse As String
    Dim strSite As String
    Dim lngCnt As Long
    Dim strTemp As String
    Dim X(1 To 20, 1 To 10)
    X(1, 1) = "OI"
    X(1, 2) = "Chng in vol"
    X(1, 3) = "Volume"
    X(1, 4) = "IV"
    X(1, 5) = "LTP"
    X(1, 6) = "Net Chg"
    X(1, 7) = "Bid Qty"
    X(1, 8) = "Bid Price"
    X(1, 9) = "Ask Price"
    X(1, 10) = "Ask Qnty"

    Set objXmlHTTP = CreateObject("MSXML2.XMLHTTP")
    strSite = "http://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionDates.jsp?symbol=NIFTY&instrument=OPTIDX&strike=4700.00"

    On Error GoTo ErrHandler
    With objXmlHTTP
        .Open "GET", strSite, False
        .Send
        If .Status = 200 Then strResponse = .ResponseText
    End With
    On Error GoTo 0

    Set objRegex = CreateObject("vbscript.regexp")
    With objRegex
        '*cleaning regex* to remove all spaces
        .Pattern = "[\xA0\s]+"
        .Global = True
        strResponse = .Replace(strResponse, vbNullString)
        .Pattern = "(<tdclass=""ylwbg"")(Style.+?){0,1}>(.+?)(<\/td>)"
        If .Test(strResponse) Then
            lngCnt = 20
            Set objRegMC = .Execute(strResponse)
            For Each objRegM In objRegMC
                lngCnt = lngCnt + 1
                If Right$(objRegM.submatches(2), 2) <> "a>" Then
                    X(Int((lngCnt - 1) / 10), IIf(lngCnt Mod 10 > 0, lngCnt Mod 10, 10)) = objRegM.submatches(2)
                Else
                'Get submatches of the form <a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=4700.00&type=CE&expiry=23FEB2012" target="_blank"> 206.40</a>
                    strTemp = Val(Right(objRegM.submatches(2), Len(objRegM.submatches(2)) - InStrRev(objRegM.submatches(2), """") - 1))
                    X(Int((lngCnt - 1) / 10), IIf(lngCnt Mod 10 > 0, lngCnt Mod 10, 10)) = strTemp
                End If
            Next
        Else
            MsgBox "Parsing unsuccessful", vbCritical
        End If
    End With
    Set objRegex = Nothing
    Set objXmlHTTP = Nothing
    [a1].Resize(UBound(X, 1), UBound(X, 2)) = X
    Exit Sub
ErrHandler:
    MsgBox "Site not accessible"
    If Not objXmlHTTP Is Nothing Then Set objXmlHTTP = Nothing
End Sub

After some fiddling I have derived a regex/VBA solution using

  1. XMLHTTP to access the site (change strSite to suit)
  2. a Regexp to get the required numbers
  3. a variant array with 20 records to hold, then dump the numbers to the active sheet

output
Looking at the source HTML to find Regex patterns

The Call options have a common starting and finishing string that delimit the 10 values, but there are three different strings

  1. Strings 1-4,7-10 for each record match <td class="ylwbg">X</td>
  2. String 6 has a Style (and other text) preceding the > before the X
  3. String 5 contains a much longer <a href textX</a>

A regex of
.Pattern = "(<tdclass=""ylwbg"")(Style.+?){0,1}>(.+?)(<\/td>)"
extracts all the needed strings, but further work is needed later on string 5

The Put options start with <td class="nobg" so these are happily not extracted by a regex that gets points 1-3

enter image description here
Actual Code

    Sub GetTxt()
    Dim objXmlHTTP As Object
    Dim objRegex As Object
    Dim objRegMC As Object
    Dim objRegM As Object
    Dim strResponse As String
    Dim strSite As String
    Dim lngCnt As Long
    Dim strTemp As String
    Dim X(1 To 20, 1 To 10)
    X(1, 1) = "OI"
    X(1, 2) = "Chng in vol"
    X(1, 3) = "Volume"
    X(1, 4) = "IV"
    X(1, 5) = "LTP"
    X(1, 6) = "Net Chg"
    X(1, 7) = "Bid Qty"
    X(1, 8) = "Bid Price"
    X(1, 9) = "Ask Price"
    X(1, 10) = "Ask Qnty"

    Set objXmlHTTP = CreateObject("MSXML2.XMLHTTP")
    strSite = "http://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionDates.jsp?symbol=NIFTY&instrument=OPTIDX&strike=4700.00"

    On Error GoTo ErrHandler
    With objXmlHTTP
        .Open "GET", strSite, False
        .Send
        If .Status = 200 Then strResponse = .ResponseText
    End With
    On Error GoTo 0

    Set objRegex = CreateObject("vbscript.regexp")
    With objRegex
        '*cleaning regex* to remove all spaces
        .Pattern = "[\xA0\s]+"
        .Global = True
        strResponse = .Replace(strResponse, vbNullString)
        .Pattern = "(<tdclass=""ylwbg"")(Style.+?){0,1}>(.+?)(<\/td>)"
        If .Test(strResponse) Then
            lngCnt = 20
            Set objRegMC = .Execute(strResponse)
            For Each objRegM In objRegMC
                lngCnt = lngCnt + 1
                If Right$(objRegM.submatches(2), 2) <> "a>" Then
                    X(Int((lngCnt - 1) / 10), IIf(lngCnt Mod 10 > 0, lngCnt Mod 10, 10)) = objRegM.submatches(2)
                Else
                'Get submatches of the form <a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=4700.00&type=CE&expiry=23FEB2012" target="_blank"> 206.40</a>
                    strTemp = Val(Right(objRegM.submatches(2), Len(objRegM.submatches(2)) - InStrRev(objRegM.submatches(2), """") - 1))
                    X(Int((lngCnt - 1) / 10), IIf(lngCnt Mod 10 > 0, lngCnt Mod 10, 10)) = strTemp
                End If
            Next
        Else
            MsgBox "Parsing unsuccessful", vbCritical
        End If
    End With
    Set objRegex = Nothing
    Set objXmlHTTP = Nothing
    [a1].Resize(UBound(X, 1), UBound(X, 2)) = X
    Exit Sub
ErrHandler:
    MsgBox "Site not accessible"
    If Not objXmlHTTP Is Nothing Then Set objXmlHTTP = Nothing
End Sub
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文