好听音乐网歌曲爬虫首个版本,目前支持爬取

import re   #python正则表达式模块
import requests     #外部requests模块
import time
####################################页面规则####################################
# 首页
# http://www.htqyy.com/top/hot
# 第二页
# http://www.htqyy.com/top/musicList/hot?pageIndex=1&pageSize=20
# 第三页
# http://www.htqyy.com/top/musicList/hot?pageIndex=2&pageSize=20
####################################歌曲地址规则####################################
# 歌曲url: http://f2.htqyy.com/play7/20/mp3/2

def make_url(name,page,month):
    if "热播榜" == name:
        for i in range(0,page):
            url = 'http://www.htqyy.com/top/musicList/hot?pageIndex='+str(i)+'&pageSize=20'

            wash_data(url,month)

    elif "新曲榜" == name:
        for i in range(0,page):
            url = 'http://www.htqyy.com/top/musicList/new?pageIndex='+str(i)+'&pageSize=20'

            wash_data(url, month)

    elif "最新推荐" == name:
        for i in range(0,page):
            url = 'http://www.htqyy.com/top/musicList/recommend?pageIndex='+str(i)+'&pageSize=20'

            wash_data(url, month)

    elif "最新单曲" == name:
        for i in range(0,page):
            url = 'http://www.htqyy.com/top/musicList/latest?pageIndex='+str(i)+'&pageSize=20'

            wash_data(url, month)

def wash_data(url,month):
    view_web = requests.get(url).text

    # 使用正则表达式筛选需要的数据
    pat_sid = re.findall('<input type="checkbox" name="checked" checked="checked" value="(.*?)"><span', view_web)
    pat_title = re.findall('" target="play" title="(.*?)" sid="', view_web)

    songs_sid = []
    songs_title = []

    # 将pat_sid数据和pay_title数据分别组合到songs_sid和songs_title列表内
    songs_sid.extend(pat_sid)
    songs_title.extend(pat_title)

    download(songs_sid, songs_title, url)

def download(songs_sid,songs_title,url):
    for i in range(0,len(songs_sid)):
        url = 'http://f2.htqyy.com/play7/'+str(songs_sid[i])+'/mp3/4'
        songs_url = requests.get(url).content

        print('正在下载第',str(i+1),'首>>>',songs_title[i])

        with open(r'A:\Desktop\music\{}.mp3'.format(songs_title[i]),'wb') as f:
            f.write(songs_url)
        print(songs_title[i],'下载完成')

        time.sleep(0)

if __name__ == '__main__':
    name = input("输入需要爬取的排行榜分类,歌曲名称或歌手:")
    page = int(input('输入需要爬取的页数:'))
    month = int(input('现在是几月份,输入错误则无法下载:'))
    make_url(name,page,month)

扫描二维码,在手机上阅读!
最后修改:2020 年 04 月 26 日 10 : 23 PM
如果觉得我的文章对你有用,请随意赞赏