硒无法通过标签名称找到元素
我正在尝试从网站上进行一些数据刮擦,我需要在某个时候获得某个元素。该元素的名称和ID被动态生成XPATH,CSS_SELECTOR,ID是无法使用的。我正在尝试使用标签名称获取元素,但失败了。我要获取父元素(在这种情况下为li)并应用find_element()函数以获取第一个子元素(),但它不起作用。我必须获得这个特定元素的原因是,我需要扩展的阴影根。
Python代码:
nb_cv=0
liens_profils=[]
nb_cv_dmd=int(nb_cv_dmd)
while nb_cv<nb_cv_dmd:
#on ouvre le premier shadow root
WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH,'//*[@id="resultslist"]')))
liste_profils_shadow= expand_shadow_element(driver.find_element(by=By.XPATH,value='//*[@id="resultslist"]'))
#on recupere l'élément parent des profils et on récupère tous les profils
parent_profils=liste_profils_shadow.find_element(by=By.ID,value="list")
profils=parent_profils.find_elements(by=By.TAG_NAME,value="li")
for profil in profils:
if nb_cv<nb_cv_dmd:
try:
#WebDriverWait(driver,20).until(EC.presence_of_element_located((By.TAG_NAME,"results-card")))
result_card=profil.find_element(by=By.TAG_NAME,value="results-card")
result_card_shadow=expand_shadow_element(result_card)
html:
<!-- Liste de résultats -->
<results-list class="resultslist"
id="resultslist"
state=[{"idCandidat":17595816,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":14747406,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17816114,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":15634996,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17641294,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17808000,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":16696140,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":13730612,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":16623996,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17435668,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17809379,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":15037379,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17781323,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":10005222,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17805944,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17415932,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":13931906,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":15098652,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17745872,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17790259,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}}] withPreview>
<template slot="17595816" data-results-card>
<results-card id="person_17595816"
data-candidat-id="17595816"
data-over-url="/Recherche/CvOver?candidateId=17595816&guid=4739f03b-bc68-47e5-ba5c-9a79f1978d44&Position=1&NbResultats=966&isAnonymized=False&IdSource=1&OngletProjet=Candidats&OngletSelectionne=ParDate&Sources=1"
mobility='{
"otherDeps":["95","78","91","94","77","92","75","93"],
"homeDep":"76",
"isForeign":false,
"location":"Saint-Étienne-du-Rouvray",
"mobility":[]
}'
infos='{
"qualification":"3",
"exp":"1",
"posteRecherche":"Développeur web, \u003cspan class=\"highlight\"\u003eDevOps\u003c/span\u003e, Développeur Java JEE",
"sector":["Informatique / Télécom / Multimédia"],
"salary":"À convenir",
"handicap":false,
"dispo":"Immédiate"
}'
data='{
"posteActuel":"Ingénieur d\u0027études et développement informatique",
"name":"Ulrich KENMOGNE KAMGUIA",
"linkUrl": "/Profil?CandidateId=17595816&Position=1&NbResultats=966&NbParPages=11&IdSource=1&Sources=System.Collections.Generic.List%601%5BSystem.Int32%5D&GuidRecherche=4739f03b-bc68-47e5-ba5c-9a79f1978d44&OngletSelectionne=ParDate&CvCurrentPage=1&NbPageCv=0&OngletProjet=Candidats&FromSelection=False&FromNotification=False&StartFrom=0&IndexCandidatListe=0&CurrentPosition=0&IdRecherche=0&SourceOriginePage=Recherche&sources=1",
"sitePicto":"BDM/job",
"sitePictoLabel":"BDM/job",
"date":"21/06/2022 13:08:05",
"isMoreThan12Month":false,
"idCandidat":"17595816"
}'
buttonsData='{
"idCandidat":"17595816",
"idProjet":"",
"name":"Ulrich KENMOGNE KAMGUIA",
"idSource":"1",
"currentPosition":"1",
"currentCandidate":"17595816",
"sources":"1",
"extensionOriginale":"",
"hash":"b625f3af37973803d90b964f04da93d3"
}'
tags='[]'>
exception:
Message: invalid argument: invalid locator
(Session info: chrome=102.0.5005.115)
Stacktrace:
Backtrace:
Ordinal0 [0x0113D953+2414931]
Ordinal0 [0x010CF5E1+1963489]
Ordinal0 [0x00FBC6B8+837304]
Ordinal0 [0x00FE9691+1021585]
Ordinal0 [0x00FE97EB+1021931]
Ordinal0 [0x00FDFD21+982305]
Ordinal0 [0x010044E4+1131748]
Ordinal0 [0x00FDFC74+982132]
Ordinal0 [0x010046B4+1132212]
Ordinal0 [0x01014812+1198098]
Ordinal0 [0x010042B6+1131190]
Ordinal0 [0x00FDE860+976992]
Ordinal0 [0x00FDF756+980822]
GetHandleVerifier [0x013ACC62+2510274]
GetHandleVerifier [0x0139F760+2455744]
GetHandleVerifier [0x011CEABA+551962]
GetHandleVerifier [0x011CD916+547446]
Ordinal0 [0x010D5F3B+1990459]
Ordinal0 [0x010DA898+2009240]
Ordinal0 [0x010DA985+2009477]
Ordinal0 [0x010E3AD1+2046673]
BaseThreadInitThunk [0x76C9FA29+25]
RtlGetAppContainerNamedObjectPath [0x777A7A7E+286]
RtlGetAppContainerNamedObjectPath [0x777A7A4E+238]
我真的不是真的 :我真的不是知道为什么,但是当我检查页面源代码与使用开发人员工具检查时,HTML似乎有所不同。
I'm trying to do some data scraping from a website and I need to get a certain element at some point. The element's name and id are dynamically generated to the XPATH, CSS_SELECTOR, ID are unusable. I'm trying to use the tag name to get the element but it fails. I'm getting the parent element (li in this case) and applying the find_element() function to get the very first child element () but it doesn't work. The reason I have to get this specific element is that there is a shadow root stemming from it that I need to expand.
Python code:
nb_cv=0
liens_profils=[]
nb_cv_dmd=int(nb_cv_dmd)
while nb_cv<nb_cv_dmd:
#on ouvre le premier shadow root
WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH,'//*[@id="resultslist"]')))
liste_profils_shadow= expand_shadow_element(driver.find_element(by=By.XPATH,value='//*[@id="resultslist"]'))
#on recupere l'élément parent des profils et on récupère tous les profils
parent_profils=liste_profils_shadow.find_element(by=By.ID,value="list")
profils=parent_profils.find_elements(by=By.TAG_NAME,value="li")
for profil in profils:
if nb_cv<nb_cv_dmd:
try:
#WebDriverWait(driver,20).until(EC.presence_of_element_located((By.TAG_NAME,"results-card")))
result_card=profil.find_element(by=By.TAG_NAME,value="results-card")
result_card_shadow=expand_shadow_element(result_card)
The element i'm trying to select
HTML:
<!-- Liste de résultats -->
<results-list class="resultslist"
id="resultslist"
state=[{"idCandidat":17595816,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":14747406,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17816114,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":15634996,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17641294,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17808000,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":16696140,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":13730612,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":16623996,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17435668,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17809379,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":15037379,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17781323,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":10005222,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17805944,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17415932,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":13931906,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":15098652,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17745872,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}},{"idCandidat":17790259,"isSelected":false,"iscollapsed":true,"hello":{"helloState":"","helloNeedFeedback":false}}] withPreview>
<template slot="17595816" data-results-card>
<results-card id="person_17595816"
data-candidat-id="17595816"
data-over-url="/Recherche/CvOver?candidateId=17595816&guid=4739f03b-bc68-47e5-ba5c-9a79f1978d44&Position=1&NbResultats=966&isAnonymized=False&IdSource=1&OngletProjet=Candidats&OngletSelectionne=ParDate&Sources=1"
mobility='{
"otherDeps":["95","78","91","94","77","92","75","93"],
"homeDep":"76",
"isForeign":false,
"location":"Saint-Étienne-du-Rouvray",
"mobility":[]
}'
infos='{
"qualification":"3",
"exp":"1",
"posteRecherche":"Développeur web, \u003cspan class=\"highlight\"\u003eDevOps\u003c/span\u003e, Développeur Java JEE",
"sector":["Informatique / Télécom / Multimédia"],
"salary":"À convenir",
"handicap":false,
"dispo":"Immédiate"
}'
data='{
"posteActuel":"Ingénieur d\u0027études et développement informatique",
"name":"Ulrich KENMOGNE KAMGUIA",
"linkUrl": "/Profil?CandidateId=17595816&Position=1&NbResultats=966&NbParPages=11&IdSource=1&Sources=System.Collections.Generic.List%601%5BSystem.Int32%5D&GuidRecherche=4739f03b-bc68-47e5-ba5c-9a79f1978d44&OngletSelectionne=ParDate&CvCurrentPage=1&NbPageCv=0&OngletProjet=Candidats&FromSelection=False&FromNotification=False&StartFrom=0&IndexCandidatListe=0&CurrentPosition=0&IdRecherche=0&SourceOriginePage=Recherche&sources=1",
"sitePicto":"BDM/job",
"sitePictoLabel":"BDM/job",
"date":"21/06/2022 13:08:05",
"isMoreThan12Month":false,
"idCandidat":"17595816"
}'
buttonsData='{
"idCandidat":"17595816",
"idProjet":"",
"name":"Ulrich KENMOGNE KAMGUIA",
"idSource":"1",
"currentPosition":"1",
"currentCandidate":"17595816",
"sources":"1",
"extensionOriginale":"",
"hash":"b625f3af37973803d90b964f04da93d3"
}'
tags='[]'>
exception:
Message: invalid argument: invalid locator
(Session info: chrome=102.0.5005.115)
Stacktrace:
Backtrace:
Ordinal0 [0x0113D953+2414931]
Ordinal0 [0x010CF5E1+1963489]
Ordinal0 [0x00FBC6B8+837304]
Ordinal0 [0x00FE9691+1021585]
Ordinal0 [0x00FE97EB+1021931]
Ordinal0 [0x00FDFD21+982305]
Ordinal0 [0x010044E4+1131748]
Ordinal0 [0x00FDFC74+982132]
Ordinal0 [0x010046B4+1132212]
Ordinal0 [0x01014812+1198098]
Ordinal0 [0x010042B6+1131190]
Ordinal0 [0x00FDE860+976992]
Ordinal0 [0x00FDF756+980822]
GetHandleVerifier [0x013ACC62+2510274]
GetHandleVerifier [0x0139F760+2455744]
GetHandleVerifier [0x011CEABA+551962]
GetHandleVerifier [0x011CD916+547446]
Ordinal0 [0x010D5F3B+1990459]
Ordinal0 [0x010DA898+2009240]
Ordinal0 [0x010DA985+2009477]
Ordinal0 [0x010E3AD1+2046673]
BaseThreadInitThunk [0x76C9FA29+25]
RtlGetAppContainerNamedObjectPath [0x777A7A7E+286]
RtlGetAppContainerNamedObjectPath [0x777A7A4E+238]
I don't really know why but the HTML seems different when I inspect the page's source code than when I inspect it with developer tools.
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论