HEADERS = { 'User-Agent': "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", 'Referer': "http://whois.chinaz.com/dytt8.net" }
# 获取每一部电影详情页的url defget_detail_urls(url): res = requests.get(url, headers=HEADERS) # requests库,会默认使用自己猜测得编码方式进行解码,有时会猜错,需要手动指定解码 text = res.content.decode('gbk', errors='ignore') html = etree.HTML(text) # <Element html at 0x168712c4540> detail_urls = html.xpath("//table[@class='tbspan']//a/@href")
# 注释掉得等同于下面得列表表达式 # def abc(url): # return BASE_DOMAIN+url # index = 0 # for detail_url in detail_urls: # detail_url = abc(detail_url) # detail_urls[index] = detail_url # index += 1
defread_list(): withopen('temp.csv', 'r') as fp: # reader是一个迭代器 reader = csv.reader(fp) # 不获取第一行 next(reader) for x in reader: time = x[1] volumn = x[-2] print({'time': time, 'volumn': volumn})
defread_dict(): withopen('temp1.csv', 'r') as fp: # 不包含标题那行数据 reader = csv.DictReader(fp) for x in reader: print(x)
deffind_db(cursor): sql = """select * from students""" cursor.execute(sql) # print(cursor.fetchall()[-1]) result = cursor.fetchall() for temp in result: print(temp)
defupdate_db(cursor): sql = """update students set score='100' where name='小龙'""" cursor.execute(sql)
defdelete_db(cursor): sql = """delete from students where name='小龙'""" cursor.execute(sql)
#!/usr/bin/python3 # -*- coding: utf-8 -*- import time import random from selenium import webdriver import xlwt from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC