Store应用最新版本,Python小白求大神指点为什么没有数据出来也没有报错

我想获取这个网站的电气说明书PDFimportrequestsimporturllib.requestfrombs4importBeautifulSoupimportreimportosdefcollect_category(url):category_urls=[]headers={‘User-Agent’:’Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/63.0.3239.132Safari/537.36QIHU360SE’}r=requests.get(url,headers=headers)#print(r.status_code)soup=BeautifulSoup(r.text,”html.parser”)urls=soup.select(“div.pro_menudldda”)foriinurls:category_urls.append(“”+i.get(“href”))print(category_urls)returncategory_urlsdefcollect_items(url):items_urls=[]headers={‘User-Agent’:’Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/63.0.3239.132Safari/537.36QIHU360SE’}r=requests.get(url,headers=headers)soup=BeautifulSoup(r.text,”html.parser”)urls=soup.select(“#pro_listlia”)foriinurls:items_urls.append(“”+i.get(“href”))returnitems_urlsdefdownload_pdf(url):headers={‘User-Agent’:’Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/63.0.3239.132Safari/537.36QIHU360SE’}r=requests.get(url,headers=headers)soup=BeautifulSoup(r.text,”html.parser”)urls=soup.select(“bodydiv:nth-child(10)divdiv:nth-child(3)ullia”)foriinurls:name=i.get_text()name=re.sub(“/”,””,name)print(name)ifos.path.exists(‘D:/delixi/%s.pdf’%name):print(“文件已存在”)continuepdf_url=””+i.get(“href”)print(pdf_url)u=urllib.request.urlopen(pdf_url)print(“进入成功,正在下载……”)block_sz=8192withopen(‘D:/delixi/%s.pdf’%name,’wb’)asf:whileTrue:buffer=u.read(block_sz)ifbuffer:f.write(buffer)else:print(‘第%d个文件已下载’%n)breakprint(“=====================”)url=””category_urls=collect_category(url)print(“目录链接收集完毕”)n=0foriincategory_urls:items_urls=collect_items(i)print(“准备开始下载PDF”)forainitems_urls:n+=1download_pdf(a)print(“全部文件下载完毕”)求大佬帮我看看应该怎么改

代码headersList=[{‘User-Agent’:’Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/63.0.3239.132Safari/537.36QIHU360SE’},{‘User-Agent’:’Mozilla/5.0(Macintosh;IntelMacOSX10_11_6)AppleWebKit/537.36(KHTML,likeGecko)Chrome/67.0.3396.99Safari/537.36′},{‘User-Agent’:’Mozilla/5.0(WindowsNT10.0;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/65.0.3314.0Safari/537.36SE2.XMetaSr1.0′},{‘User-Agent’:’Mozilla/5.0(WindowsNT10.0;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/77.0.3865.120Safari/537.36′}]defgetHtmlList(url):headers=headersList[random.randint(0,len(headersList)-1)]#headers=headersList[3]attempts=0success=Falsewhileattempts5andnotsuccess:try:r=requests.get(url,headers=headers,timeout=100)success=Truereturnrexcept:time.sleep(15)print(time.ctime())print(“失败”+url)attempts+=1if(attempts==5):breakdefget_apple_lastest_version():res_apple=getHtmlList(”)res_apple.encoding=’utf-8’apple_pattern=”版本[2-9].[4-9].[0-9][0-9]?/p”apple_pattern_2=”[2-9].[4-9].[0-9][0-9]?”apple_version=re.findall(apple_pattern,res_apple.text,flags=0)if(len(apple_version)==0):file_temp=”C:\temp20191023\error.txt”data_temp=open(file_temp,’w+’)print(res_apple.text,file=data_temp)data_temp.close()return”0.0.0″apple_version=re.findall(apple_pattern_2,apple_version[0],flags=0)apple=apple_version[0]returnapple每60~120s读取一次。一段时间后get到的错误html!DOCTYPEhtmlhtmllang=”zh-cn”prefix=”og:#”headmetacharset=”utf-8″meta”X-UA-Compatible”content=”IE=edge”metaname=”viewport”content=”width=device-width,initial-scale=1,viewport-fit=cover”metaname=”web-experience-app/config/environment”content=”%7B%22appVersion%22%3A1%2C%22modulePrefix%22%3A%22web-experience-app%22%2C%22environment%22%3A%22production%22%2C%22rootURL%22%3A%22/%22%2C%22locationType%22%3A%22history-hash-router-scroll%22%2C%22historySupportMiddleware%22%3Atrue%2C%22contentSecurityPolicyMeta%22%3Atrue%2C%22contentSecurityPolicy%22%3A%7B%22default-src%22%3A%5B%22%27none%27%22%5D%2C%22img-src%22%3A%5B%22%27self%27%22%2C%22http%3A//*.mzstatic.com%22%2C%22*.mzstatic.com%22%2C%22*.apple.com%22%2C%22*.googleusercontent.com%22%2C%22data%3A%22%5D%2C%22style-src%22%3A%5B%22%27self%27%22%2C%22%27unsafe-inline%27%22%2C%22*.apple.com%22%5D%2C%22font-src%22%3A%5B%22%27self%27%22%2C%22http%3A//*.apple.com%22%2C%22%3A//*.apple.com%22%5D%2C%22medi到底是啥情况…应该怎么办…?求各位大神给个办法

importrequestsfrombs4importBeautifulSoupimporttimeheaders={‘User-Agent’:’Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/63.0.3239.132Safari/537.36′}defxingbie_sex(class_name):ifclass_name==[‘member_boy_ioc’]:return’男’else:return’女’defget_likes(url):res=requests.get(url,headers=headers)soup=BeautifulSoup(res.text,’lxml’)likes=soup.select(‘#page_listullia’)defget_info(url):res=requests.get(url,headers=headers)soup=BeautifulSoup(res.text,’lxml’)dizhis=soup.select(‘#page_listulli(1)div.result_btm_con.lodgeunitnamediv.result_introaspan’)mingzis=soup.select(‘#floatRightBoxdiv.js_box.clearfixdiv.w_240h6a’)jiages=soup.select(‘#scrollPricediv.flspan’)fordizhi,mingzi,jiageinzip(dizhis,mingzis,jiages):data={‘dizhi’:dizhi.get_texte().strip(),’mingzi’:mingzi.get_text().strip(),’jiage’:jiage.price.get_text()}print(data)if__name__==’__main__’:urls=[‘{}-0/’.format(nume)fornumeinrange(1,14)]forsingle_urlinurls:get_likes(single_url)time.sleep(2)

发表评论