
"""变量:变化的量 1、变量值 value 2、变量的内存地址 ID 3、变量的数据类型 type"""# import numpy as np# arr = np.arange(10)# print(type(arr))# print(arr)## l1 = [1,2,3,4,5]# print(type(l1))# print(l1)# import requests## header = {# ‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/76.0.3809.132 Safari/537.36‘# }## res = requests.get(‘https://www.bilibili.com/vIDeo/av68746541/?spm_ID_from=333.334.b_63686965665f7265636f6d6d656e64.16‘,# headers=header)# res.enCoding = res.apparent_enCoding# print(res.text)# for i in range(0,100,25):# print(i)import reprint(re.findall(‘a.*?c‘,‘a123c456dsdadac‘))02模拟浏览器登陆
import requestsheader = { ‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/76.0.3809.132 Safari/537.36‘}res = requests.get(‘https://dig.chouti.com/‘,headers=header)data = res.textprint(data)03爬豆瓣again
‘‘‘爬取豆瓣电影top250第一页:https://movIE.douban.com/top250?start=0&filter=第二页:https://movIE.douban.com/top250?start=25&filter=requests:请求库re:正则表达式‘‘‘import requestsimport re# 拼接电影爬去地址urlfor line in range(0,25): url = f‘https://movIE.douban.com/top250?start={line}&filter=‘ response = requests.get(url) # data = response.text # 3.解析并提取数据 data = re.findall( ‘<div >.*?<a href="(.*?)">.*?<span >(.*?)</span>.*?<span property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span>‘,response.text,re.S) # re.S忽略换行 for d in data: url,name,point,count = d movIE_data = ‘‘‘ 电影名称: %s 电影地址: %s 电影评分: %s 评价人数: %s \n ‘‘‘ % (name,url,count) print(movIE_data) with open(‘豆瓣.txt‘,‘a‘,enCoding=‘utf-8‘) as f: f.write(movIE_data)# print(url)# 往拼接好的ulr地址发送请求获取数据# response = requests.get(url)# # print(response.text) # 获取文本数据# # 3.解析并提取数据# # 电影名称、电影地址、电影评分、评价人数# # re.findall(‘匹配文本的规则‘,‘匹配的文本‘,‘匹配模式‘) # 解析提取文本数据中 想要的数据# # .*?: 过滤不想要的数据,直到想要的数据出现# # (.*?): 提取想要的数据# # 匹配规则# # <div class="item">.*?<a href="(.*?)">.*?<span class="Title">(.*?)</span>.*?<span class="rating_num" property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span># data = re.findall(# ‘<div >.*?<a href="(.*?)">.*?<span >(.*?)</span>.*?<span property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span>‘,# response.text,re.S) # re.S忽略换行# # print(data)# for d in data:# # print(d)## url,count = d## movIE_data = ‘‘‘# 电影名称: %s# 电影地址: %s# 电影评分: %s# 评价人数: %s# \n# ‘‘‘ % (name,count)## print(movIE_data)## # 4.保存数据# # a: append# with open(‘豆瓣.txt‘,enCoding=‘utf-8‘) as f:# f.write(movIE_data)04友好爬豆瓣
import requests# import refrom bs4 import BeautifulSoupfrom openpyxl import Workbookimport timewb = Workbook()sheet = wb.activecount = 1# 拼接电影爬去地址urlfor line in range(0,100,25): url = f‘https://movIE.douban.com/top250?start={line}&filter=‘ # 向对方服务器发送请求,获取响应数据 response = requests.get(url) # 拿到文本数据 data = response.text # 通过‘HTML.parser‘解析器解析数据 soup = BeautifulSoup(data,‘HTML.parser‘) # 找到类名为:grID_vIEw的ol标签 ol = soup.find(name="ol",attrs={‘class‘: ‘grID_vIEw‘}) # 拿到中间的25个li标签,存入列表中 li_List = ol.find_all(name=‘li‘) sheet.Title = ‘好评电影‘ sheet[‘A1‘].value = ‘序号‘ sheet[‘B1‘].value = ‘电影名称‘ sheet[‘C1‘].value = ‘电影评分‘ sheet[‘D1‘].value = ‘电影链接‘ sheet[‘E1‘].value = ‘电影图片‘ for li in li_List: # 找到类名为:Title的span标签 name = li.find(name=‘span‘,attrs={‘class‘: ‘Title‘}) url = li.find(name=‘a‘) rat = li.find(name=‘span‘,attrs={‘class‘: ‘rating_num‘}) img = li.find(name=‘img‘) count = count + 1 sheet[‘A%s‘ % (count)].value = count - 1 sheet[‘B%s‘ % (count)].value = name.text sheet[‘C%s‘ % (count)].value = rat.text sheet[‘D%s‘ % (count)].value = url[‘href‘] sheet[‘E%s‘ % (count)].value = img[‘src‘] time.sleep(1)wb.save(‘好评电影.xlsx‘)05金山词霸翻译
import requestsimport Jsondef main(key=""): header = { ‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/76.0.3809.132 Safari/537.36‘ } url = ‘http://fy.iciba.com/AJAX.PHP?a=fy‘ data = { ‘f‘: ‘auto‘,‘t‘: ‘auto‘,‘w‘: key } # 向对方服务器发送post请求,带上headers,数据data res = requests.post(url=url,headers=header,data=data) data = res.text # 将二进制数据反序列化 data_List = Json.loads(data) # print(data_List) try: val = data_List[‘content‘][‘word_mean‘] except: val = data_List[‘content‘][‘out‘] return valif __name__ == ‘__main__‘: # 接收用户输入 key = input(‘请输入要翻译的词语:‘).strip() # 判断如果输入为空将要执行的代码 if not key: print(‘输入为空‘) else: data = main(key=key) print(data)总结
以上是内存溢出为你收集整理的day04 python全部内容,希望文章能够帮你解决day04 python所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
微信扫一扫
支付宝扫一扫
评论列表(0条)