from urllib import request,parse
import urllib
import time


headers = {
    "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1"
}
#贴吧url规律
# https://tieba.baidu.com/f?kw=%E6%AE%B5%E5%8F%8B%E4%B9%8B%E5%AE%B6&ie=utf-8&pn=0     #第一页
# https://tieba.baidu.com/f?kw=%E6%AE%B5%E5%8F%8B%E4%B9%8B%E5%AE%B6&ie=utf-8&pn=50    #第二页

def    make_url(name,url,start_page,end_page):
    for page in range(start_page,end_page+1):
        pn = (page-1)*50
        full_url = url+"&pn="+str(pn)
        filename = 'A:/Desktop/TiebaSpider/'+str(name)+str(page)+'页.html'

        html=visit_page(name,full_url,filename)
        write(page,html,filename)

def    visit_page(name,full_url,filename):
    print("正在访问:",name+"吧")
    req = request.Request(full_url,headers=headers)
    resp = request.urlopen(req).read()
    return resp


def    write(page,html,filename):
    print("正在保存第",page,"页为",filename)
    with open(filename,"wb") as f:
        f.write(html)
        time.sleep(1)
    print("=====下载成功=====")
    time.sleep(2)

if __name__ == '__main__':
    kw = input("输入贴吧名:")
    name = kw
    start_page = int(input("起始页:"))
    end_page = int(input("结束页:"))

    url = "https://tieba.baidu.com/f?"
    kw = parse.urlencode({"kw":kw})
    url = url+kw
    make_url(name,url,start_page,end_page)

    time.sleep(2)
    print("++++++++++已全部下载完成++++++++++")

扫描二维码,在手机上阅读!
最后修改:2020 年 04 月 09 日 07 : 25 PM
如果觉得我的文章对你有用,请随意赞赏