新版:http://www.cnbeining.com/?p=410 增加了海外源功能。
差不多是最终版了。
自己抓,自己建立文件夹,自己改名,自己下载,自己合并,自己清理,自己弄弹幕。
国内外都友好。(联通你个缺大德的东西,没有你我根本不用写这么多,尼玛死。。。)
API化,不怕封锁。
源地址获取双保险,cid获取双保险。再不行的话,我也没辙了。
需要aria2c,ffmpeg。
我在OSX上写的,并测试成功。Linux也能用。Windows嘛,请用AcDown,然后自己合并合并。
已知问题:
极其极端情况下,下载失败。我也不知道为什么了。。。应该是sina的CDN问题吧。
''' Biligrab 0.2 Beining@ACICFG cnbeining[at]gmail.com MIT licence ''' import sys import os from StringIO import StringIO import gzip import urllib2 import sys reload(sys) sys.setdefaultencoding('utf-8') def main(vid, p): cid = 0 title = '' partname = '' biliurl = 'http://api.bilibili.tv/view?type=xml&appkey=876fe0ebd0e67a0f&id=' + str(vid) + '&page=' + str(p) videourl = 'http://www.bilibili.tv/video/av'+vid+'/index_'+p+'.html' print('Fetching webpage...') request = urllib2.Request(biliurl) response = urllib2.urlopen(request) data = response.read() data_list = data.split('\n') for lines in data_list: if 'cid' in lines: cid = lines[7:-6] print('cid is ' + str(cid)) if 'partname' in lines: partname = lines[12:-11] print('partname is ' + str(partname)) if 'title' in lines: title = lines[9:-8] print('title is ' + str(title)) if cid is 0: print('Cannot find cid, trying to do it brutely...') print('Fetching webpage...') request = urllib2.Request(videourl) request.add_header('Accept-encoding', 'gzip') response = urllib2.urlopen(request) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO( response.read()) f = gzip.GzipFile(fileobj=buf) data = f.read() data_list = data.split('\n') #Todo: read title for lines in data_list: if 'cid=' in lines: cid = lines.split('&') cid = cid[0].split('=') cid = cid[-1] print('cid is ' + str(cid)) break if cid is 0: print('Cannot get cid anyway, you fix it, I am off to get some coffee...') exit() #start to make folders... if title is not '': folder = title else: folder = cid if partname is not '': filename = partname elif title is not '': filename = title else: filename = cid folder_to_make = os.getcwd() + '/' + folder if not os.path.exists(folder_to_make): os.makedirs(folder_to_make) os.chdir(folder_to_make) print('Fetching XML...') os.system('curl -o "'+filename+'.xml" --compressed http://comment.bilibili.tv/'+cid+'.xml') #os.system('gzip -d '+cid+'.xml.gz') print('The XML file, ' + filename + '.xml should be ready...enjoy!') print('Finding video location...') #try api request = urllib2.Request('http://interface.bilibili.tv/playurl?cid='+cid, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' }) response = urllib2.urlopen(request) data = response.read() data_list = data.split('\r') #print(data_list) rawurl = [] vid_num = 0 for lines in data_list: lines = str(lines) if '<url>' in lines: if 'youku' in lines: url = lines[17:-9] elif 'sina' in lines: url = lines[16:-9] rawurl.append(url) if 'backup_url' in lines: break if rawurl is []: #hope this never happen request = urllib2.Request('http://www.flvcd.com/parse.php?kw='+videourl) request.add_header('Accept-encoding', 'gzip') response = urllib2.urlopen(request) data = response.read() data_list = data.split('\n') for items in data_list: if 'name' in items and 'inf' in items and 'input' in items: c = items rawurl = c[39:-5] rawurl = rawurl.split('|') break #print(rawurl) vid_num = len(rawurl) #print(rawurl) print(str(vid_num) + ' videos to download, fetch yourself a cup of coffee...') for i in range(vid_num): print('Downloading ' + str(i+1) + ' of ' + str(vid_num) + ' videos...') #print('aria2c -llog.txt -c -s16 -x16 -k1M --out '+str(i)+'.flv "'+rawurl[i]+'"') os.system('aria2c -larialog.txt -c -s16 -x16 -k1M --out '+str(i)+'.flv "'+rawurl[i]+'"') f = open('ff.txt', 'w') ff = '' os.getcwd() for i in range(vid_num): ff = ff + 'file \'' + str(os.getcwd()) + '/'+ str(i) + '.flv\'\n' ff = ff.encode("utf8") f.write(ff) f.close() print('Concating videos...') os.system('ffmpeg -f concat -i ff.txt -c copy "'+filename+'".mp4') os.system('rm -r ff.txt') for i in range(vid_num): os.system('rm -r '+str(i)+'.flv') print('Done, enjoy yourself!') exit() vid = str(input('av')) p = str(input('P')) main(vid, p)
这个语言我没用过。
大家做法都一样,唯一可以给的建议是,可以用json,xml格式的数据实在是太麻烦,json好
过了半年来看,其实都一样,只不过我个人喜欢XML看起来规矩一点。。。。。。。。。