使用python将数据写入excel时索引超出范围异常
从 URL 获取数据时出现错误,数据 [][] 中索引超出范围异常。
import requests
from bs4 import BeautifulSoup
import openpyxl
import uuid
row_number=2
for i in range(1,3):
website_url = "https://www.example.com/job_search?page=1&txtKeyword=IT&keyword=&txtLocation=Thiruvananthapuram%2C&page="+str(i)
res = requests.get(website_url, verify=False)
soup = BeautifulSoup(res.text, 'lxml')
Links = soup.find_all('div', {'class': 'col-md-6 col-sm-12'},)
data1=[]
data2=[]
for div_block in soup.find_all('dl', class_=['description-list'],style=None):
data1.append([line.strip() for line in div_block.stripped_strings])
for div_block in soup.find_all('div', class_=['details full-width'],style=None):
data2.append([line.strip() for line in div_block.stripped_strings])
url = [tag.find('a')['href'] for tag in Links]
wb = openpyxl.Workbook()
# Write a header row
columns = [
("job_listing_id"),#1
("unique_hash"),#2`enter code here`
("status"),#3
("primary_skills"),#4
("secondary_skills"),#5
("title"),#6
("description"),#7
("job_type"),#8
("source"),#9
("experience"),#10
("location"),#11
("company"),#12
("posted_date"),#13
("expiryDate"),#14
("vacancies"),#15
("company_website"),#16
("posted_by")#17
]
ws = wb.active
for col_number, value, in enumerate(columns, start=1) :
ws.cell(column=col_number, row=1, value=value)
row_number=row_number
y=0
id=uuid.uuid4
for x in url:
res = requests.get(f'{x}', verify=False)
soup = BeautifulSoup(res.text, 'lxml')
data = []
for div_block in soup.find_all('div', class_=['content-block-normal','details full-width'], style=None) :
data.append([line.strip() for line in div_block.stripped_strings])
# Write a data row
row = [
row_number-1, # job_listing_id
str(uuid.uuid4()), # Unique Hash
'NEW',#status
data1[y][5],#primary_skills
'',#secondary_skills
data2[y][0], # Job title
'\n'.join(data[2][1:]) ,#description`
'NA',#job_type
'NA', #source
data1[y][1],#EXp
data[1][4],#Location
data2[y][1],#company
data[0][3], # posted on
'', #expiryDate
'',#vacancies
'',#company_website
'example'#posted_by
]
y+=1
for col_number, value in enumerate(row, start=1):
cell = ws.cell(column=col_number, row=row_number, value=value)
row_number += 1
While taking data from the URL I got an error, index out of range exception in the data[][].
import requests
from bs4 import BeautifulSoup
import openpyxl
import uuid
row_number=2
for i in range(1,3):
website_url = "https://www.example.com/job_search?page=1&txtKeyword=IT&keyword=&txtLocation=Thiruvananthapuram%2C&page="+str(i)
res = requests.get(website_url, verify=False)
soup = BeautifulSoup(res.text, 'lxml')
Links = soup.find_all('div', {'class': 'col-md-6 col-sm-12'},)
data1=[]
data2=[]
for div_block in soup.find_all('dl', class_=['description-list'],style=None):
data1.append([line.strip() for line in div_block.stripped_strings])
for div_block in soup.find_all('div', class_=['details full-width'],style=None):
data2.append([line.strip() for line in div_block.stripped_strings])
url = [tag.find('a')['href'] for tag in Links]
wb = openpyxl.Workbook()
# Write a header row
columns = [
("job_listing_id"),#1
("unique_hash"),#2`enter code here`
("status"),#3
("primary_skills"),#4
("secondary_skills"),#5
("title"),#6
("description"),#7
("job_type"),#8
("source"),#9
("experience"),#10
("location"),#11
("company"),#12
("posted_date"),#13
("expiryDate"),#14
("vacancies"),#15
("company_website"),#16
("posted_by")#17
]
ws = wb.active
for col_number, value, in enumerate(columns, start=1) :
ws.cell(column=col_number, row=1, value=value)
row_number=row_number
y=0
id=uuid.uuid4
for x in url:
res = requests.get(f'{x}', verify=False)
soup = BeautifulSoup(res.text, 'lxml')
data = []
for div_block in soup.find_all('div', class_=['content-block-normal','details full-width'], style=None) :
data.append([line.strip() for line in div_block.stripped_strings])
# Write a data row
row = [
row_number-1, # job_listing_id
str(uuid.uuid4()), # Unique Hash
'NEW',#status
data1[y][5],#primary_skills
'',#secondary_skills
data2[y][0], # Job title
'\n'.join(data[2][1:]) ,#description`
'NA',#job_type
'NA', #source
data1[y][1],#EXp
data[1][4],#Location
data2[y][1],#company
data[0][3], # posted on
'', #expiryDate
'',#vacancies
'',#company_website
'example'#posted_by
]
y+=1
for col_number, value in enumerate(row, start=1):
cell = ws.cell(column=col_number, row=row_number, value=value)
row_number += 1
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
对于您在评论中给出的 URL,数据以 JSON 形式返回,可以直接访问,如下所示:
这会将
description
显示为:对于您的其他站点,您将需要调查尝试使用他们的API。以下内容应该可以帮助您开始:
为您提供开始数据:
希望这有帮助
For the URL you have given in the comment, the data is returned as JSON which can be accessed directly as follows:
This would display
description
as:For your other site, you will need to investigate trying to use their API. The following should get you started:
Giving you data starting:
Hope this helps