加个parse防止有人在av和P作死。
https://gist.github.com/cnbeining/9605757
代码下面。
'''
Biligrab 0.74
Beining@ACICFG
cnbeining[at]gmail.com
http://www.cnbeining.com
MIT licence
'''
import sys
import os
from StringIO import StringIO
import gzip
import urllib2
import sys
import commands
from xml.dom.minidom import parse, parseString
import xml.dom.minidom
reload(sys)
sys.setdefaultencoding('utf-8')
global vid
global cid
global partname
global title
global videourl
global part_now
def list_del_repeat(list):
"""delete repeating items in a list, and keep the order.
http://www.cnblogs.com/infim/archive/2011/03/10/1979615.html"""
l2 = []
[l2.append(i) for i in list if not i in l2]
return(l2)
#----------------------------------------------------------------------
def find_cid_api(vid, p):
"""find cid and print video detail"""
global cid
global partname
global title
global videourl
cookiepath = './bilicookies'
try:
cookies = open(cookiepath, 'r').readline()
#print(cookies)
except:
print('Cannot read cookie, may affect some videos...')
cookies = ''
cid = 0
title = ''
partname = ''
if str(p) is '0' or str(p) is '1':
biliurl = 'http://api.bilibili.tv/view?type=xml&appkey=876fe0ebd0e67a0f&id=' + str(vid)
else:
biliurl = 'http://api.bilibili.tv/view?type=xml&appkey=876fe0ebd0e67a0f&id=' + str(vid) + '&page=' + str(p)
videourl = 'http://www.bilibili.tv/video/av'+ str(vid)+'/index_'+ str(p)+'.html'
print('Fetching webpage...')
try:
request = urllib2.Request(biliurl, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' , 'Cookie': cookies})
response = urllib2.urlopen(request)
data = response.read()
dom = parseString(data)
for node in dom.getElementsByTagName('cid'):
if node.parentNode.tagName == "info":
cid = node.toxml()[5:-6]
print('cid is ' + cid)
break
for node in dom.getElementsByTagName('partname'):
if node.parentNode.tagName == "info":
partname = node.toxml()[10:-11].strip()
print('partname is ' + partname)
break
for node in dom.getElementsByTagName('title'):
if node.parentNode.tagName == "info":
title = node.toxml()[7:-8].strip()
print('Title is ' + title)
except: #If API failed
print('ERROR: Cannot connect to API server!')
#----------------------------------------------------------------------
def find_cid_flvcd(videourl):
""""""
global vid
global cid
global partname
global title
print('Fetching webpage via Flvcd...')
request = urllib2.Request(videourl, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' })
request.add_header('Accept-encoding', 'gzip')
response = urllib2.urlopen(request)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO( response.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read()
data_list = data.split('\n')
#Todo: read title
for lines in data_list:
if 'cid=' in lines:
cid = lines.split('&')
cid = cid[0].split('=')
cid = cid[-1]
print('cid is ' + str(cid))
break
#----------------------------------------------------------------------
def find_link_flvcd(videourl):
""""""
request = urllib2.Request('http://www.flvcd.com/parse.php'+?+urllib.urlencode([('kw', videourl)]), headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' })
request.add_header('Accept-encoding', 'gzip')
response = urllib2.urlopen(request)
data = response.read()
data_list = data.split('\n')
for items in data_list:
if 'name' in items and 'inf' in items and 'input' in items:
c = items
rawurlflvcd = c[39:-5]
rawurlflvcd = rawurlflvcd.split('|')
return rawurlflvcd
#----------------------------------------------------------------------
def main(vid, p, oversea):
global cid
global partname
global title
global videourl
global is_first_run
videourl = 'http://www.bilibili.tv/video/av'+ str(vid)+'/index_'+ str(p)+'.html'
output = commands.getstatusoutput('ffmpeg --help')
if str(output[0]) == '32512':
print('FFmpeg does not exist! Trying to get you a binary, need root...')
os.system('sudo curl -o /usr/bin/ffmpeg https://raw.githubusercontent.com/superwbd/ABPlayerHTML5-Py--nix/master/ffmpeg')
output = commands.getstatusoutput('aria2c --help')
if str(output[0]) == '32512':
print('aria2c does not exist! Trying to get you a binary, need root... Thanks for @MartianZ \'s work.')
os.system('sudo curl -o /usr/bin/aria2c https://raw.githubusercontent.com/MartianZ/fakeThunder/master/fakeThunder/aria2c')
find_cid_api(vid, p)
global cid
if cid is 0:
print('Cannot find cid, trying to do it brutely...')
find_cid_flvcd(videourl)
if cid is 0:
is_black3 = str(raw_input('Strange, still cannot find cid... Type y for trying the unpredictable way, or input the cid by yourself, press ENTER to quit.'))
if 'y' in str(is_black3):
vid = vid - 1
p = 1
find_cid_api(vid-1, p)
cid = cid + 1
elif str(is_black3) is '':
print('Cannot get cid anyway! Quit.')
exit()
else:
cid = str(is_black3)
#start to make folders...
if title is not '':
folder = title
else:
folder = cid
if len(partname) is not 0:
filename = partname
elif title is not '':
filename = title
else:
filename = cid
# In case make too much folders
folder_to_make = os.getcwd() + '/' + folder
if is_first_run == 0:
if not os.path.exists(folder_to_make):
os.makedirs(folder_to_make)
is_first_run = 1
os.chdir(folder_to_make)
print('Fetching XML...')
os.system('curl -o "'+filename+'.xml" --compressed http://comment.bilibili.cn/'+cid+'.xml')
os.system('gzip -d '+cid+'.xml.gz')
print('The XML file, ' + filename + '.xml should be ready...enjoy!')
print('Finding video location...')
#try api
if oversea == '1':
try:
request = urllib2.Request('http://interface.bilibili.cn/v_cdn_play?cid='+cid, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' })
except:
print('ERROR: Cannot connect to CDN API server!')
elif oversea is '2':
#Force get oriurl
try:
request = urllib2.Request('http://interface.bilibili.com/player?id=cid:'+cid, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' })
except:
print('ERROR: Cannot connect to original source API server!')
else:
try:
request = urllib2.Request('http://interface.bilibili.tv/playurl?cid='+cid, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' })
except:
print('ERROR: Cannot connect to normal API server!')
response = urllib2.urlopen(request)
data = response.read()
#print(data_list)
rawurl = []
originalurl = ''
if oversea is '2':
data = data.split('\n')
for l in data:
if 'oriurl' in l:
originalurl = str(l[8:-9])
print('Original URL is ' + originalurl)
break
if originalurl is not '':
rawurl = find_link_flvcd(originalurl)
else:
print('Cannot get original URL! Using falloff plan...')
pass
else:
dom = parseString(data)
for node in dom.getElementsByTagName('url'):
if node.parentNode.tagName == "durl":
rawurl.append(node.toxml()[14:-9])
#print(str(node.toxml()[14:-9]))
pass
if rawurl is []: #hope this never happen
rawurl = find_link_flvcd(videourl)
#flvcd
#print(rawurl)
vid_num = len(rawurl)
if vid_num is 0:
print('Cannot get download URL!')
exit()
#print(rawurl)
print(str(vid_num) + ' videos in part ' + str(part_now) + ' to download, fetch yourself a cup of coffee...')
for i in range(vid_num):
print('Downloading ' + str(i+1) + ' of ' + str(vid_num) + ' videos in part ' + str(part_now) + '...')
#print('aria2c -llog.txt -c -s16 -x16 -k1M --out '+str(i)+'.flv "'+rawurl[i]+'"')
os.system('aria2c -c -s16 -x16 -k1M --out '+str(i)+'.flv "'+rawurl[i]+'"')
#os.system('aria2c -larialog.txt -c -s16 -x16 -k1M --out '+str(i)+'.flv "'+rawurl[i]+'"')
#not debugging, not fun.
f = open('ff.txt', 'w')
ff = ''
os.getcwd()
for i in range(vid_num):
ff = ff + 'file \'' + str(os.getcwd()) + '/'+ str(i) + '.flv\'\n'
ff = ff.encode("utf8")
f.write(ff)
f.close()
print('Concating videos...')
os.system('ffmpeg -f concat -i ff.txt -c copy "'+filename+'".mp4')
os.system('rm -r ff.txt')
for i in range(vid_num):
os.system('rm -r '+str(i)+'.flv')
print('Done, enjoy yourself!')
#
vid = str(raw_input('av'))
p_raw = str(raw_input('P'))
oversea = str(input('Source?'))
p_list = []
p_raw = p_raw.split(',')
for item in p_raw:
if '~' in item:
#print(item)
lower = 0
higher = 0
item = item.split('~')
try:
lower = int(item[0])
except:
print('Cannot read lower!')
try:
higher = int(item[1])
except:
print('Cannot read higher!')
if lower == 0 or higher == 0:
if lower == 0 and higher != 0:
lower = higher
elif lower != 0 and higher == 0:
higher = lower
else:
print('Cannot find any higher or lower, ignoring...')
#break
mid = 0
if higher < lower:
mid = higher
higher = lower
lower = mid
p_list.append(lower)
while lower < higher:
lower = lower + 1
p_list.append(lower)
#break
else:
try:
p_list.append(int(item))
except:
print('Cannot read "'+str(item)+'", abondon it.')
#break
p_list = list_del_repeat(p_list)
global is_first_run
is_first_run = 0
part_now = '0'
print(p_list)
for p in p_list:
reload(sys)
sys.setdefaultencoding('utf-8')
part_now = str(p)
main(vid, p, oversea)
exit()
'''
data_list = data.split('\r')
for lines in data_list:
lines = str(lines)
if '<url>' in lines:
if 'youku' in lines:
url = lines[17:-9]
elif 'sina' in lines:
url = lines[16:-9]
elif 'qq.com' in lines:
url = lines[17:-9]
elif 'letv.com' in lines:
url = lines[17:-9]
break
elif 'acgvideo' in lines:
url = lines[17:-9]
is_local = 1
rawurl.append(url)
if 'backup_url' in lines and is_local is 1:
break'''
表示这下太赞了。
对了,`oversea = str(raw_input(‘Source?’))` 最好改成 `oversea = str(raw_input(‘Source [0=Auto, 1=Oversea CDN acceleration, 2=Original source: ‘))` 这样不会让用户摸不着头脑哦,尤其是将来可能会增加一个 Source=3。
实际上考虑到高清解析,模式数量早要翻 3 倍了。我现在都不明白 q=0, q=2, q=4 分别对应什么清晰度,QQ源高清为什么503这两个问题。
我就不管他怎么玩了,一切走API要求。能放就能下,大不了以后再fake一个useragent。。。
而且CDN不一定是海外,国内也有。
之所以不写,是因为有些视频B站的CDN不备份,有些在letv备份,有些在自己的CDN备份,这个功能只能说“请求”这个地址,但是不能保证一定就可以获得要的地址,因为还要考虑IP地址的因素。。。。。。
要是加source=3,那就是强制Flvcd解析吧。Source=4是强制flvxz解析。如果B站不提供其他API,那就真没了。
Biligrab和一些黑科技不同,不维护服务器,所以不会找黑科技自建源。那些都是新番用的。这个脚本不考虑新番下载,只考虑新番弹幕下载,把找新番的工作给ktxp,然后一起扔进弹幕播放器。
当然,如果一定看新番眼馋,可以用其他脚本上flvcd嘛。。。。
那么高清呢?优酷和QQ都有高清源,我抓包发现是 q 这个参数,有取 2 和 4 的,有相关研究吗?
我的意思是,只是输出一个“Source?”人家不知道应该打什么进去,最好提示一下打数字。
新番的话,我还没有见到 playurl 失败的,你能举个栗子吗?
Just in case.特别是一部分被屏蔽的。
q这个参数在哪里出现?
加个提示也可以,当然这个脚本就不是完全小白向的。。。
我从不用这个东西下新番。因为我从不看官方翻译。。。我记得iqiyi的源是不行的。
抱歉,我刚刚重新看了一下我的代码,是quality,不是q。
quality这个参数在playurl里出现。
你知道B站官方播放器有两个地方可以调画质吧,一个是右侧设置栏(优酷),一个是左下角(乐视、QQ),开启高清之后在playurl那个HTTP请求里会出现一个quality参数。
这个参数的取值我还不太明白,有的时候乱设置能出标清,有的时候会返回一个地址连接了503。
iqiyi的源可以用playurl直接解析出来,我昨天才用它看了SAO2呢……
不知为何死活不能在你的Blog留言。。。
1.现在可以再加一个“强制解析原始源”了。
2.我有时会下载一堆然后一起看。。。
3.我是用了比较老套的方法,直接弄一个播放器放弹幕了。。。当然ASS也不错。起码在移动端可以看。
我现在就进不去移动端了。。。
因为我的blog留言被他喵的Google绑定到Google+了……有利有弊,总之没有G+帐号喵的就不能发言真差劲。
强制解析原始源这个功能我马上去加一下。是那个 player?id=cid: 吧。
。。。。要亲命了。
就是那个,但是注意直传等没有原始源就会报错,所以你看我加了flvcd的falloff。。。
明白了。