#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @DESC: 爬取歌手前3页歌曲的歌词都爬取下来,并按歌名分别保存
# @Date: 2020-05-21
import requests
import re,os
os.makedirs('music',exist_ok=True)
## 添加请求头
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36',
'origin':'https://y.qq.com',
'referer':'https://y.qq.com/portal/search.html'
}
url = 'https://c.y.qq.com/soso/fcgi-bin/client_search_cp'
singer = input('请输入歌手的名称:--> ')
## 爬取前3页的歌曲信息
for p in range(3):
kw = {
'ct': '24',
'qqmusic_ver': '1298',
'new_json': '1',
'remoteplace': 'txt.yqq.song',
'searchid': '58441748906438518',
't': '0',
'aggr': '1',
'cr': '1',
'catZhida': '1',
'lossless': '0',
'flag_qc': '0',
'p': p,
'n': '10',
'w': singer,
'g_tk_new_20200303': '1324537534',
'g_tk': '1324537534',
'format': 'json',
'inCharset': 'utf8',
'outCharset': 'utf-8',
'notice': '0',
'platform': 'yqq.json',
'needNewCode': '0'
}
response = requests.get(url,headers=headers,params=kw)
if response.status_code == 200:
res = response.json()
songs_lst = res['data']['song']['list']
for song in songs_lst:
song_info = ''
song_name = song['name'] ## 歌名
album_name = song['album']['name'] ## 专辑
interval = song['interval'] ## 播放时长
mid = song['mid'] # 歌曲ID
musicid = song['id']
sing_song_url = f'https://y.qq.com/n/yqq/song/{mid}.html'
song_info = f'歌名:{song_name}\n时长:{interval}\n专辑:{album_name}\n歌曲链接:{sing_song_url}\n***\n'
print(song_info)
h = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36',
'origin': 'https://y.qq.com',
'referer': sing_song_url,
}
url1 = 'https://c.y.qq.com/lyric/fcgi-bin/fcg_query_lyric_yqq.fcg'
params = {
'nobase64':'1',
'musicid': musicid,
'-':'jsonp1',
'g_tk_new_20200303':'',
'g_tk':'1324537534',
'loginUin':'',
'hostUin':'0',
'format':'json',
'inCharset':'utf8',
'outCharset':'utf-8',
'notice':'0',
'platform':'yqq.json',
'needNewCode':'0'
}
res = requests.get(url1,headers=h,params=params)
if res.status_code == 200:
print(res.url)
wordRegex = re.compile(r'[\u4e00-\u9fa5:]+') ## 匹配中文和中文环境下的冒号(:)
ch_words = wordRegex.findall(res.text)
print(ch_words)
## 找到第一个有冒号的串(词:XX)
for i in range(1, int(len(ch_words) / 2)):
if ch_words[i].find(':') > 0:
first = i
# print(f'前半截:{first}')
break
flag = first
## 找到最后一个有冒号的串(编曲:XX)
for i in range(first, int(len(ch_words) / 2)):
if ch_words[i].find(':') < 0 and ch_words[i + 1].find(':') < 0 and ch_words[i + 2].find(':') < 0:
flag = i
# print(f'后半截:{flag}')
break
strquqita = ''
## 过滤到歌词前的所有字符
for i in ch_words[flag:]:
if i.find(':')<0:
strquqita = strquqita+i+"\n"
with open(os.path.join('music',f'{song_name}.txt'),'w') as f:
f.write(strquqita)
# print('\n ****** \n')
else:
print(f'获取歌词页面失败,响应码:{res.status_code}')
else:
print(f'服务器连接失败,响应码:{response.status_code}')
print('完成')