如何从 IE DOM 获取最新的 IHTMLDocument2 对象
目前,我使用 MSAA 从 IE HWND 获取 IHTMLDocument2 对象。 然而,对于一些复杂的Web应用程序,这个IHTMLDocument2对象可能包含多个IHTMLDocument2对象,其中一些不属于当前显示的页面,而是属于上一页。
在我看来,IE 有时不会刷新其 DOM 对象,而是不断向其 DOM 添加更多 IHTMLDocument2 对象。 我的问题是如何从 DOM 对象获取当前显示的 IHTMLDocument2 对象。
提前致谢
更新
嗨雷米,
感谢您的回答。
是的,你是对的,我确实使用框架来访问其他 IHTMLDocument2 对象。 我的理解是,我从 HWND 获取的 IHTMLDocument2 对象是其 DOM 中的顶级对象。 IE 有时也会将先前的 IHTMLDocument2 对象放入其中一个框架中。
这是我的代码的一部分。
BOOL IESpy::GetHTMLText( CComPtr<IHTMLDocument2> spDoc, int tagNo, int schNo)
{
USES_CONVERSION;
HRESULT hr = NULL;
BOOL res = TRUE;
BOOL doneSearch = FALSE;
// Extract the source code of the document
if (spDoc) {
IHTMLFramesCollection2* pFrames = NULL;
if (hr = (spDoc->get_frames(&pFrames)) == S_OK){
LONG framesCount;
pFrames->get_length(&framesCount);
if (framesCount > 0) {
for( long i=0; i < framesCount; i++) {
VARIANT varIdx;
varIdx.vt=VT_I4;
VARIANT varResult;
varIdx.lVal=i;
VariantInit(&varResult);
hr = pFrames->item(&varIdx, &varResult);
if (SUCCEEDED(hr) && (varResult.vt == VT_DISPATCH)){
CComQIPtr<IHTMLWindow2> pFrameWnd;
CComQIPtr<IHTMLDocument2> pFrameDoc;
CComBSTR description=NULL;
pFrameWnd = varResult.pdispVal;
VariantClear(&varResult);
if (pFrameWnd == 0) {
continue;
}
hr = pFrameWnd->get_document(&pFrameDoc);
if (SUCCEEDED(hr) && pFrameDoc){
GetHTMLText( pFrameDoc, tagNo, schNo );
if ( m_foundText ) {
break;
}
} else if ( hr == E_ACCESSDENIED ) {
CComQIPtr<IWebBrowser2> spBrws = HtmlWindowToHtmlWebBrowser(pFrameWnd);
if ( spBrws != NULL) {
// Get the document object from the IWebBrowser2 object.
CComQIPtr<IDispatch> spDisp;
hr = spBrws->get_Document(&spDisp);
if ( hr == S_OK ) {
pFrameDoc = spDisp;
if ( pFrameDoc ) {
GetHTMLText( pFrameDoc, tagNo, schNo );
if ( m_foundText ) {
break;
}
}
}
}
}
}
}
}
pFrames->Release();
if ( !m_foundText ) {
res = ReadSearchText(spDoc, tagNo, schNo );
doneSearch = TRUE;
}
}
if ( !m_foundText && doneSearch == FALSE ) {
res = ReadSearchText(spDoc, tagNo, schNo );
}
}
return res;
}
BOOL IESpy::ReadSearchText(CComPtr<IHTMLDocument2> spDoc, int tagNo, int schNo )
{
USES_CONVERSION;
HRESULT hr = NULL;
BOOL found = FALSE;
IHTMLElementCollection *pAll;
hr = spDoc->get_all(&pAll);
if (FAILED(hr)) {
return FALSE;
}
long items;
IDispatch *ppvDisp;
IHTMLElement *ppvElement;
pAll->get_length(&items);
std::wstring foundText = L"";
for ( long j = 0; j < items; j++ ) {
VARIANT index;
index.vt = VT_I4;
index.lVal = j;
hr = pAll->item( index, index, &ppvDisp );
if (FAILED(hr)) {
return FALSE;
}
if ( ppvDisp ) {
ppvDisp->QueryInterface(IID_IHTMLElement, (void **)&ppvElement);
if ( ppvElement ) {
CComBSTR bstrTag;
ppvElement->get_tagName(&bstrTag);
wchar_t *wtemp = OLE2W(bstrTag);
if ( wtemp ) {
std::wstring text = ReadSearchText(ppvElement, wtemp, tagNo, schNo, found);
if ( !text.empty() ) {
if ( !foundText.empty() ) {
foundText += concat_string;
}
foundText += text;
}
ppvElement->Release();
if ( found ) {
BOOL stop = FALSE;
for ( size_t i = 0; i < m_tagName[tagNo]->size(); i++ ) {
if ( wcscmp(m_tagName[tagNo]->at(i).c_str(), L"HTML") == 0
|| wcscmp(m_tagName[tagNo]->at(i).c_str(), L"HEAD") == 0
|| wcscmp(m_tagName[tagNo]->at(i).c_str(), L"BODY") == 0 ) {
stop = TRUE;
break;
}
}
if ( stop ) {
break;
}
}
} else {
ppvElement->Release();
}
}
}
}
if ( !foundText.empty() ) {
if ( m_screenCompare ) {
// long timeStamp = GetHPTimeStamp(spDoc);
// m_temp_results[timeStamp] = foundText;
m_temp_results.push_back(foundText);
} else {
m_result += foundText;
m_result += L" ";
m_foundText = TRUE;
}
}
return TRUE;
}
Currently, I use MSAA to get an IHTMLDocument2 object from a IE HWND. However, with some complicated web applications, this IHTMLDocument2 object may contain several IHTMLDocument2 objects, some of them are not belong to the current displaying page, but the previous page.
It seems to me, IE sometimes doesn't refesh its DOM object, but keep adding more IHTMLDocument2 object into its DOM. My question is how can I get the current displaying IHTMLDocument2 object from the DOM object.
Thanks in advance
Update
Hi Remy,
Thanks for your answer.
Yes, you are right, I do use frames to get to other IHTMLDocument2 objects. My understanding is that the IHTMLDocument2 object that I get from a HWND is the top object in its DOM. IE sometimes puts the prevous IHTMLDocument2 objects inside one of the frames as well.
Here is part of my code.
BOOL IESpy::GetHTMLText( CComPtr<IHTMLDocument2> spDoc, int tagNo, int schNo)
{
USES_CONVERSION;
HRESULT hr = NULL;
BOOL res = TRUE;
BOOL doneSearch = FALSE;
// Extract the source code of the document
if (spDoc) {
IHTMLFramesCollection2* pFrames = NULL;
if (hr = (spDoc->get_frames(&pFrames)) == S_OK){
LONG framesCount;
pFrames->get_length(&framesCount);
if (framesCount > 0) {
for( long i=0; i < framesCount; i++) {
VARIANT varIdx;
varIdx.vt=VT_I4;
VARIANT varResult;
varIdx.lVal=i;
VariantInit(&varResult);
hr = pFrames->item(&varIdx, &varResult);
if (SUCCEEDED(hr) && (varResult.vt == VT_DISPATCH)){
CComQIPtr<IHTMLWindow2> pFrameWnd;
CComQIPtr<IHTMLDocument2> pFrameDoc;
CComBSTR description=NULL;
pFrameWnd = varResult.pdispVal;
VariantClear(&varResult);
if (pFrameWnd == 0) {
continue;
}
hr = pFrameWnd->get_document(&pFrameDoc);
if (SUCCEEDED(hr) && pFrameDoc){
GetHTMLText( pFrameDoc, tagNo, schNo );
if ( m_foundText ) {
break;
}
} else if ( hr == E_ACCESSDENIED ) {
CComQIPtr<IWebBrowser2> spBrws = HtmlWindowToHtmlWebBrowser(pFrameWnd);
if ( spBrws != NULL) {
// Get the document object from the IWebBrowser2 object.
CComQIPtr<IDispatch> spDisp;
hr = spBrws->get_Document(&spDisp);
if ( hr == S_OK ) {
pFrameDoc = spDisp;
if ( pFrameDoc ) {
GetHTMLText( pFrameDoc, tagNo, schNo );
if ( m_foundText ) {
break;
}
}
}
}
}
}
}
}
pFrames->Release();
if ( !m_foundText ) {
res = ReadSearchText(spDoc, tagNo, schNo );
doneSearch = TRUE;
}
}
if ( !m_foundText && doneSearch == FALSE ) {
res = ReadSearchText(spDoc, tagNo, schNo );
}
}
return res;
}
BOOL IESpy::ReadSearchText(CComPtr<IHTMLDocument2> spDoc, int tagNo, int schNo )
{
USES_CONVERSION;
HRESULT hr = NULL;
BOOL found = FALSE;
IHTMLElementCollection *pAll;
hr = spDoc->get_all(&pAll);
if (FAILED(hr)) {
return FALSE;
}
long items;
IDispatch *ppvDisp;
IHTMLElement *ppvElement;
pAll->get_length(&items);
std::wstring foundText = L"";
for ( long j = 0; j < items; j++ ) {
VARIANT index;
index.vt = VT_I4;
index.lVal = j;
hr = pAll->item( index, index, &ppvDisp );
if (FAILED(hr)) {
return FALSE;
}
if ( ppvDisp ) {
ppvDisp->QueryInterface(IID_IHTMLElement, (void **)&ppvElement);
if ( ppvElement ) {
CComBSTR bstrTag;
ppvElement->get_tagName(&bstrTag);
wchar_t *wtemp = OLE2W(bstrTag);
if ( wtemp ) {
std::wstring text = ReadSearchText(ppvElement, wtemp, tagNo, schNo, found);
if ( !text.empty() ) {
if ( !foundText.empty() ) {
foundText += concat_string;
}
foundText += text;
}
ppvElement->Release();
if ( found ) {
BOOL stop = FALSE;
for ( size_t i = 0; i < m_tagName[tagNo]->size(); i++ ) {
if ( wcscmp(m_tagName[tagNo]->at(i).c_str(), L"HTML") == 0
|| wcscmp(m_tagName[tagNo]->at(i).c_str(), L"HEAD") == 0
|| wcscmp(m_tagName[tagNo]->at(i).c_str(), L"BODY") == 0 ) {
stop = TRUE;
break;
}
}
if ( stop ) {
break;
}
}
} else {
ppvElement->Release();
}
}
}
}
if ( !foundText.empty() ) {
if ( m_screenCompare ) {
// long timeStamp = GetHPTimeStamp(spDoc);
// m_temp_results[timeStamp] = foundText;
m_temp_results.push_back(foundText);
} else {
m_result += foundText;
m_result += L" ";
m_foundText = TRUE;
}
}
return TRUE;
}
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
IHTMLDocument2 不能包含其他 IHTMLDocument2 对象(除非它们属于页面上的框架),当然也不能包含来自先前页面的对象。 你如何准确地确定这一点? 你能展示一些代码吗?
An IHTMLDocument2 cannot contain other IHTMLDocument2 objects (unless they belong to frames on the page), and certainly not from previous pages. How are you determining that exactly? Can you show some code?