快捷導(dǎo)航

python 爬取B站原視頻的實(shí)例代碼

更新時間：2020年09月09日 08:49:52 作者：崔笑顏

這篇文章主要介紹了python 爬取B站原視頻的實(shí)例代碼，幫助大家更好的理解和使用python 爬蟲,感興趣的朋友可以了解下

B站原視頻爬取，我就不多說直接上代碼。直接運(yùn)行就好。
B站是把視頻和音頻分開。要把2個合并起來使用。這個需要分析才能看出來。然后就是登陸這塊是比較難的。

import os
import re
import argparse
import subprocess
import prettytable
from DecryptLogin import login


'''B站類'''
class Bilibili():
	def __init__(self, username, password, **kwargs):
		self.username = username
		self.password = password
		self.session = Bilibili.login(username, password)
		self.headers = {
						'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
					}
		self.user_info_url = 'http://api.bilibili.com/x/space/acc/info'
		self.submit_videos_url = 'http://space.bilibili.com/ajax/member/getSubmitVideos'
		self.view_url = 'http://api.bilibili.com/x/web-interface/view'
		self.video_player_url = 'http://api.bilibili.com/x/player/playurl'
	'''運(yùn)行主程序'''
	def run(self):
		while True:
			userid = input('請輸入目標(biāo)用戶ID(例:345993405)(我的一個LOL好友凱撒可以關(guān)注他一下 謝謝) ——> ')
			user_info = self.__getUserInfo(userid)
			tb = prettytable.PrettyTable()
			tb.field_names = list(user_info.keys())
			tb.add_row(list(user_info.values()))
			print('獲取的用戶信息如下:')
			print(tb)
			is_download = input('是否下載該用戶的所有視頻(y/n, 默認(rèn): y) ——> ')
			if is_download == 'y' or is_download == 'yes' or not is_download:
				self.__downloadVideos(userid)
	'''根據(jù)userid獲得該用戶基本信息'''
	def __getUserInfo(self, userid):
		params = {'mid': userid, 'jsonp': 'jsonp'}
		res = self.session.get(self.user_info_url, params=params, headers=self.headers)
		res_json = res.json()
		user_info = {
						'用戶名': res_json['data']['name'],
						'性別': res_json['data']['sex'],
						'個性簽名': res_json['data']['sign'],
						'用戶等級': res_json['data']['level'],
						'生日': res_json['data']['birthday']
					}
		return user_info
	'''下載目標(biāo)用戶的所有視頻'''
	def __downloadVideos(self, userid):
		if not os.path.exists(userid):
			os.mkdir(userid)
		# 非會員用戶只能下載到高清1080P
		quality = [('16', '流暢 360P'),
				  ('32', '清晰 480P'),
				  ('64', '高清 720P'),
				  ('74', '高清 720P60'),
				  ('80', '高清 1080P'),
				  ('112', '高清 1080P+'),
				  ('116', '高清 1080P60')][-3]
		# 獲得用戶的視頻基本信息
		video_info = {'aids': [], 'cid_parts': [], 'titles': [], 'links': [], 'down_flags': []}
		params = {'mid': userid, 'pagesize': 30, 'tid': 0, 'page': 1, 'order': 'pubdate'}
		while True:
			res = self.session.get(self.submit_videos_url, headers=self.headers, params=params)
			res_json = res.json()
			for item in res_json['data']['vlist']:
				video_info['aids'].append(item['aid'])
			if len(video_info['aids']) < int(res_json['data']['count']):
				params['page'] += 1
			else:
				break
		for aid in video_info['aids']:
			params = {'aid': aid}
			res = self.session.get(self.view_url, headers=self.headers, params=params)
			cid_part = []
			for page in res.json()['data']['pages']:
				cid_part.append([page['cid'], page['part']])
			video_info['cid_parts'].append(cid_part)
			title = res.json()['data']['title']
			title = re.sub(r"[‘'\/\\\:\*\?\"\<\>\|\s']", ' ', title)
			video_info['titles'].append(title)
		print('共獲取到用戶ID<%s>的<%d>個視頻...' % (userid, len(video_info['titles'])))
		for idx in range(len(video_info['titles'])):
			aid = video_info['aids'][idx]
			cid_part = video_info['cid_parts'][idx]
			link = []
			down_flag = False
			for cid, part in cid_part:
				params = {'avid': aid, 'cid': cid, 'qn': quality, 'otype': 'json', 'fnver': 0, 'fnval': 16}
				res = self.session.get(self.video_player_url, params=params, headers=self.headers)
				res_json = res.json()
				if 'dash' in res_json['data']:
					down_flag = True
					v, a = res_json['data']['dash']['video'][0], res_json['data']['dash']['audio'][0]
					link_v = [v['baseUrl']]
					link_a = [a['baseUrl']]
					if v['backup_url']:
						for item in v['backup_url']:
							link_v.append(item)
					if a['backup_url']:
						for item in a['backup_url']:
							link_a.append(item)
					link = [link_v, link_a]
				else:
					link = [res_json['data']['durl'][-1]['url']]
					if res_json['data']['durl'][-1]['backup_url']:
						for item in res_json['data']['durl'][-1]['backup_url']:
							link.append(item)
				video_info['links'].append(link)
				video_info['down_flags'].append(down_flag)
		# 開始下載
		out_pipe_quiet = subprocess.PIPE
		out_pipe = None
		aria2c_path = os.path.join(os.getcwd(), 'tools/aria2c')
		ffmpeg_path = os.path.join(os.getcwd(), 'tools/ffmpeg')
		for idx in range(len(video_info['titles'])):
			title = video_info['titles'][idx]
			aid = video_info['aids'][idx]
			down_flag = video_info['down_flags'][idx]
			print('正在下載視頻<%s>...' % title)
			if down_flag:
				link_v, link_a = video_info['links'][idx]
				# --視頻
				url = '"{}"'.format('" "'.join(link_v))
				command = '{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}'
				command = command.format(aria2c_path, len(link_v), userid, title+'.flv', aid, "", url)
				print(command)
				process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)
				process.wait()
				# --音頻
				url = '"{}"'.format('" "'.join(link_a))
				command = '{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}'
				command = command.format(aria2c_path, len(link_v), userid, title+'.aac', aid, "", url)
				print(command)

				process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)
				process.wait()
				# --合并
				command = '{} -i "{}" -i "{}" -c copy -f mp4 -y "{}"'
				command = command.format(ffmpeg_path, os.path.join(userid, title+'.flv'), os.path.join(userid, title+'.aac'), os.path.join(userid, title+'.mp4'))
				print(command)

				process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe_quiet, shell=True)
				process.wait()
				os.remove(os.path.join(userid, title+'.flv'))
				os.remove(os.path.join(userid, title+'.aac'))
			else:
				link = video_info['links'][idx]
				url = '"{}"'.format('" "'.join(link))
				command = '{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}'
				command = command.format(aria2c_path, len(link), userid, title+'.flv', aid, "", url)
				process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)
				process.wait()
				os.rename(os.path.join(userid, title+'.flv'), os.path.join(userid, title+'.mp4'))
		print('所有視頻下載完成, 該用戶所有視頻保存在<%s>文件夾中...' % (userid))
	'''借助大佬開源的庫來登錄B站'''
	@staticmethod
	def login(username, password):
		_, session = login.Login().bilibili(username, password)
		return session


'''run'''
if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='下載B站指定用戶的所有視頻(僅支持Windows下使用)')
	parser.add_argument('--username', dest='username', help='xxx', type=str, required=True)
	parser.add_argument('--password', dest='password', help='xxxx', type=str, required=True)
	print(parser)
	args = parser.parse_args(['--password', 'xxxx','--username', 'xxx'])
	# args = parser.parse_args(['--password', 'FOO'])
	print('5')
	bili = Bilibili(args.username, args.password)
	bili.run()

把賬號密碼填上就行。這是我根據(jù)一個微信公眾號Charles大佬的想法寫的。大家可以去關(guān)注他一下。

以上就是python 爬取B站原視頻的實(shí)例代碼的詳細(xì)內(nèi)容，更多關(guān)于python 爬取B站原視頻的資料請關(guān)注腳本之家其它相關(guān)文章！

您可能感興趣的文章: