https://github.com/cnbeining/Biligrab
其实是不少更新。。。
我说这几天为什么flvcd模块不大舒服呢,原来死了。。。
合并后检验,如果没出MP4(我还真不知道,也不想冒险检验文件长度),就转成flv,还不行就报错不删文件。够仁义了吧。。。
代码下面。
''' Biligrab 0.81 Beining@ACICFG cnbeining[at]gmail.com http://www.cnbeining.com MIT licence ''' import sys import os from StringIO import StringIO import gzip import urllib import urllib2 import sys import commands import hashlib from xml.dom.minidom import parse, parseString import xml.dom.minidom reload(sys) sys.setdefaultencoding('utf-8') global vid global cid global partname global title global videourl global part_now global appkey global secretkey appkey='85eb6835b0a1034e'; secretkey = '2ad42749773c441109bdc0191257a664' def list_del_repeat(list): """delete repeating items in a list, and keep the order. http://www.cnblogs.com/infim/archive/2011/03/10/1979615.html""" l2 = [] [l2.append(i) for i in list if not i in l2] return(l2) #---------------------------------------------------------------------- def find_cid_api(vid, p): """find cid and print video detail""" global cid global partname global title global videourl cookiepath = './bilicookies' try: cookies = open(cookiepath, 'r').readline() #print(cookies) except: print('Cannot read cookie, may affect some videos...') cookies = '' cid = 0 title = '' partname = '' if str(p) is '0' or str(p) is '1': str2Hash = 'appkey=85eb6835b0a1034e&id=' + str(vid) + '&type=xml2ad42749773c441109bdc0191257a664' sign_this = hashlib.md5(str2Hash.encode('utf-8')).hexdigest() biliurl = 'https://api.bilibili.com/view?appkey=85eb6835b0a1034e&id=' + str(vid) + '&type=xml&sign=' + sign_this print(biliurl) else: str2Hash = 'appkey=85eb6835b0a1034e&id=' + str(vid) + '&page=' + str(p) + '&type=xml2ad42749773c441109bdc0191257a664' sign_this = hashlib.md5(str2Hash.encode('utf-8')).hexdigest() biliurl = 'https://api.bilibili.com/view?appkey=85eb6835b0a1034e&id=' + str(vid) + '&page=' + str(p) + '&type=xml&sign=' + sign_this #print(biliurl) videourl = 'http://www.bilibili.com/video/av'+ str(vid)+'/index_'+ str(p)+'.html' print('Fetching webpage...') try: request = urllib2.Request(biliurl, headers={ 'User-Agent' : 'Biligrab /0.8 (cnbeining@gmail.com)', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' , 'Cookie': cookies}) response = urllib2.urlopen(request) data = response.read() dom = parseString(data) for node in dom.getElementsByTagName('cid'): if node.parentNode.tagName == "info": cid = node.toxml()[5:-6] print('cid is ' + cid) break for node in dom.getElementsByTagName('partname'): if node.parentNode.tagName == "info": partname = node.toxml()[10:-11].strip() print('partname is ' + partname) break for node in dom.getElementsByTagName('title'): if node.parentNode.tagName == "info": title = node.toxml()[7:-8].strip() print('Title is ' + title) except: #If API failed print('ERROR: Cannot connect to API server!') #---------------------------------------------------------------------- def find_cid_flvcd(videourl): """""" global vid global cid global partname global title print('Fetching webpage via Flvcd...') request = urllib2.Request(videourl, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' }) request.add_header('Accept-encoding', 'gzip') response = urllib2.urlopen(request) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO( response.read()) f = gzip.GzipFile(fileobj=buf) data = f.read() data_list = data.split('\n') #Todo: read title for lines in data_list: if 'cid=' in lines: cid = lines.split('&') cid = cid[0].split('=') cid = cid[-1] print('cid is ' + str(cid)) break #---------------------------------------------------------------------- def find_link_flvcd(videourl): """""" request = urllib2.Request('http://www.flvcd.com/parse.php?'+urllib.urlencode([('kw', videourl)]), headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' }) request.add_header('Accept-encoding', 'gzip') response = urllib2.urlopen(request) data = response.read() data_list = data.split('\n') for items in data_list: if 'name' in items and 'inf' in items and 'input' in items: c = items rawurlflvcd = c[59:-5] rawurlflvcd = rawurlflvcd.split('|') return rawurlflvcd #---------------------------------------------------------------------- def main(vid, p, oversea): global cid global partname global title global videourl global is_first_run videourl = 'http://www.bilibili.com/video/av'+ str(vid)+'/index_'+ str(p)+'.html' output = commands.getstatusoutput('ffmpeg --help') if str(output[0]) == '32512': print('FFmpeg does not exist! Trying to get you a binary, need root...') os.system('sudo curl -o /usr/bin/ffmpeg https://raw.githubusercontent.com/superwbd/ABPlayerHTML5-Py--nix/master/ffmpeg') output = commands.getstatusoutput('aria2c --help') if str(output[0]) == '32512': print('aria2c does not exist! Trying to get you a binary, need root... Thanks for @MartianZ \'s work.') os.system('sudo curl -o /usr/bin/aria2c https://raw.githubusercontent.com/MartianZ/fakeThunder/master/fakeThunder/aria2c') find_cid_api(vid, p) global cid if cid is 0: print('Cannot find cid, trying to do it brutely...') find_cid_flvcd(videourl) if cid is 0: is_black3 = str(raw_input('Strange, still cannot find cid... Type y for trying the unpredictable way, or input the cid by yourself, press ENTER to quit.')) if 'y' in str(is_black3): vid = vid - 1 p = 1 find_cid_api(vid-1, p) cid = cid + 1 elif str(is_black3) is '': print('Cannot get cid anyway! Quit.') exit() else: cid = str(is_black3) #start to make folders... if title is not '': folder = title else: folder = cid if len(partname) is not 0: filename = partname elif title is not '': filename = title else: filename = cid # In case make too much folders folder_to_make = os.getcwd() + '/' + folder if is_first_run == 0: if not os.path.exists(folder_to_make): os.makedirs(folder_to_make) is_first_run = 1 os.chdir(folder_to_make) print('Fetching XML...') os.system('curl -o "'+filename+'.xml" --compressed http://comment.bilibili.com/'+cid+'.xml') os.system('gzip -d '+cid+'.xml.gz') print('The XML file, ' + filename + '.xml should be ready...enjoy!') print('Finding video location...') #try api sign_this = hashlib.md5('appkey=' + appkey + '&cid=' + cid + secretkey.encode('utf-8')).hexdigest() if oversea == '1': try: request = urllib2.Request('http://interface.bilibili.com/v_cdn_play?appkey=' + appkey + '&cid=' + cid + '&sign=' + sign_this, headers={ 'User-Agent' : 'Biligrab /0.8 (cnbeining@gmail.com)', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' }) except: print('ERROR: Cannot connect to CDN API server!') elif oversea is '2': #Force get oriurl try: request = urllib2.Request('http://interface.bilibili.com/player?appkey=' + appkey + '&cid=' + cid + '&sign=' + sign_this, headers={ 'User-Agent' : 'Biligrab /0.8 (cnbeining@gmail.com)', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' }) except: print('ERROR: Cannot connect to original source API server!') else: try: request = urllib2.Request('http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid + '&sign=' + sign_this, headers={ 'User-Agent' : 'Biligrab /0.8 (cnbeining@gmail.com)', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' }) except: print('ERROR: Cannot connect to normal API server!') response = urllib2.urlopen(request) data = response.read() rawurl = [] originalurl = '' if oversea is '2': data = data.split('\n') for l in data: if 'oriurl' in l: originalurl = str(l[8:-9]) print('Original URL is ' + originalurl) break if originalurl is not '': rawurl = find_link_flvcd(originalurl) else: print('Cannot get original URL! Using falloff plan...') pass else: dom = parseString(data) for node in dom.getElementsByTagName('url'): if node.parentNode.tagName == "durl": rawurl.append(node.toxml()[14:-9]) #print(str(node.toxml()[14:-9])) pass if rawurl is []: #hope this never happen rawurl = find_link_flvcd(videourl) #flvcd #print(rawurl) vid_num = len(rawurl) if vid_num is 0: print('Cannot get download URL!') exit() #print(rawurl) print(str(vid_num) + ' videos in part ' + str(part_now) + ' to download, fetch yourself a cup of coffee...') for i in range(vid_num): print('Downloading ' + str(i+1) + ' of ' + str(vid_num) + ' videos in part ' + str(part_now) + '...') #print('aria2c -llog.txt -c -s16 -x16 -k1M --out '+str(i)+'.flv "'+rawurl[i]+'"') os.system('aria2c -c -s16 -x16 -k1M --out '+str(i)+'.flv "'+rawurl[i]+'"') #os.system('aria2c -larialog.txt -c -s16 -x16 -k1M --out '+str(i)+'.flv "'+rawurl[i]+'"') #not debugging, not fun. f = open('ff.txt', 'w') ff = '' os.getcwd() for i in range(vid_num): ff = ff + 'file \'' + str(os.getcwd()) + '/'+ str(i) + '.flv\'\n' ff = ff.encode("utf8") f.write(ff) f.close() print('Concating videos...') os.system('ffmpeg -f concat -i ff.txt -c copy "'+filename+'".mp4') if os.path.isfile(str(filename+'.mp4')): os.system('rm -r ff.txt') for i in range(vid_num): os.system('rm -r '+str(i)+'.flv') print('Done, enjoy yourself!') else: print('ERROR: Cannot concatenative files, trying to make flv...') os.system('ffmpeg -f concat -i ff.txt -c copy "'+filename+'".flv') if os.path.isfile(str(filename+'.flv')): print('FLV file made. Not possible to mux to MP4, highly likely due to audio format.') os.system('rm -r ff.txt') for i in range(vid_num): os.system('rm -r '+str(i)+'.flv') else: print('ERROR: Cannot concatenative files, trying to make flv...') vid = str(raw_input('av')) p_raw = str(raw_input('P')) oversea = str(input('Source?')) p_list = [] p_raw = p_raw.split(',') for item in p_raw: if '~' in item: #print(item) lower = 0 higher = 0 item = item.split('~') try: lower = int(item[0]) except: print('Cannot read lower!') try: higher = int(item[1]) except: print('Cannot read higher!') if lower == 0 or higher == 0: if lower == 0 and higher != 0: lower = higher elif lower != 0 and higher == 0: higher = lower else: print('Cannot find any higher or lower, ignoring...') #break mid = 0 if higher < lower: mid = higher higher = lower lower = mid p_list.append(lower) while lower < higher: lower = lower + 1 p_list.append(lower) #break else: try: p_list.append(int(item)) except: print('Cannot read "'+str(item)+'", abondon it.') #break p_list = list_del_repeat(p_list) global is_first_run is_first_run = 0 part_now = '0' print(p_list) for p in p_list: reload(sys) sys.setdefaultencoding('utf-8') part_now = str(p) main(vid, p, oversea) exit() ''' data_list = data.split('\r') for lines in data_list: lines = str(lines) if '<url>' in lines: if 'youku' in lines: url = lines[17:-9] elif 'sina' in lines: url = lines[16:-9] elif 'qq.com' in lines: url = lines[17:-9] elif 'letv.com' in lines: url = lines[17:-9] break elif 'acgvideo' in lines: url = lines[17:-9] is_local = 1 rawurl.append(url) if 'backup_url' in lines and is_local is 1: break'''