Biligrab 0.5: 加入一个新黑科技,加入批量下载

在P的地方输入:
1,2,3,4,5,6~10
 
简单明了吧。
 
单独也好用。
 
今天发现B站API如果死了程序报错,做一下error-handling.
 
Gist老地方:https://gist.github.com/superwbd/9605757
 
代码藏起来,否则页面都慢了。

'''
Biligrab 0.5
Beining@ACICFG
cnbeining[at]gmail.com
MIT licence
'''
import sys
import os
from StringIO import StringIO
import gzip
import urllib2
import sys
import commands
reload(sys)
sys.setdefaultencoding('utf-8')
global vid
global cid
global partname
global title
global videourl
global part_now
def list_del_repeat(list):
    """delete repeating items in a list, and keep the order.
    http://www.cnblogs.com/infim/archive/2011/03/10/1979615.html"""
    l2 = []
    [l2.append(i) for i in list if not i in l2]
    return(l2)
#----------------------------------------------------------------------
def find_cid_api(vid, p):
    """find cid and print video detail"""
    global cid
    global partname
    global title
    global videourl
    cid = 0
    title = ''
    partname = ''
    biliurl = 'http://api.bilibili.tv/view?type=xml&appkey=876fe0ebd0e67a0f&id=' + str(vid) + '&page=' + str(p)
    videourl = 'http://www.bilibili.tv/video/av'+ str(vid)+'/index_'+ str(p)+'.html'
    print('Fetching webpage...')
    try:
        request = urllib2.Request(biliurl, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' })
        response = urllib2.urlopen(request)
        data = response.read()
        data_list = data.split('\n')
        for lines in data_list:
            if 'cid' in lines:
                cid = lines[7:-6]
                print('cid is ' + str(cid))
            if 'partname' in lines:
                partname = lines[12:-11]
                print('partname is ' + str(partname))
            if 'title' in lines:
                title = lines[9:-8]
                print('title is ' + str(title))
    except:  #If API failed
        print('ERROR: Cannot connect to API server!')
#----------------------------------------------------------------------
def find_cid_flvcd(videourl):
    """"""
    global vid
    global cid
    global partname
    global title
    print('Fetching webpage via Flvcd...')
    request = urllib2.Request(videourl)
    request.add_header('Accept-encoding', 'gzip')
    response = urllib2.urlopen(request, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' })
    if response.info().get('Content-Encoding') == 'gzip':
        buf = StringIO( response.read())
        f = gzip.GzipFile(fileobj=buf)
        data = f.read()
    data_list = data.split('\n')
    #Todo: read title
    for lines in data_list:
        if 'cid=' in lines:
            cid = lines.split('&')
            cid = cid[0].split('=')
            cid = cid[-1]
            print('cid is ' + str(cid))
            break
#----------------------------------------------------------------------
def main(vid, p, oversea):
    global cid
    global partname
    global title
    global videourl
    output = commands.getstatusoutput('ffmpeg --help')
    if str(output[0]) == '32512':
        print('FFmpeg does not exist! Trying to get you a binary, need root...')
        os.system('sudo curl -o /usr/bin/ffmpeg https://raw.githubusercontent.com/superwbd/ABPlayerHTML5-Py--nix/master/ffmpeg')
    output = commands.getstatusoutput('aria2c --help')
    if str(output[0]) == '32512':
        print('aria2c does not exist! Trying to get you a binary, need root... Thanks for @MartianZ \'s work.')
        os.system('sudo curl -o /usr/bin/aria2c https://raw.githubusercontent.com/MartianZ/fakeThunder/master/fakeThunder/aria2c')
    find_cid_api(vid, p)
    global cid
    if cid is 0:
        print('Cannot find cid, trying to do it brutely...')
        find_cid_flvcd(videourl)
    if cid is 0:
        print('Strange, still cannot find cid... One last try, unpredictable')
        vid = vid - 1
        p = 1
        find_cid_api(vid-1, p)
        cid = cid + 1
    if cid is 0:
        cid = str(input('Cannot get cid anyway! If you know the cid, please type it in here, or I will just quit.'))
        exit()
    #start to make folders...
    if title is not '':
        folder = title
    else:
        folder = cid
    if partname is not '':
        filename = partname
    elif title is not '':
        filename = title
    else:
        filename = cid
    folder_to_make = os.getcwd() + '/' + folder
    if not os.path.exists(folder_to_make):
        os.makedirs(folder_to_make)
    os.chdir(folder_to_make)
    print('Fetching XML...')
    os.system('curl -o "'+filename+'.xml" --compressed  http://comment.bilibili.cn/'+cid+'.xml')
    #os.system('gzip -d '+cid+'.xml.gz')
    print('The XML file, ' + filename + '.xml should be ready...enjoy!')
    print('Finding video location...')
    #try api
    if oversea == '1':
        try:
            request = urllib2.Request('http://interface.bilibili.cn/v_cdn_play?cid='+cid, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' })
        except:
            print('ERROR: Cannot connect to API server!')
    else:
        try:
            request = urllib2.Request('http://interface.bilibili.tv/playurl?cid='+cid, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' })
        except:
            print('ERROR: Cannot connect to API server!')
    response = urllib2.urlopen(request)
    data = response.read()
    data_list = data.split('\r')
    #print(data_list)
    rawurl = []
    vid_num = 0
    for lines in data_list:
        lines = str(lines)
        if '<url>' in lines:
            if 'youku'  in lines:
                url = lines[17:-9]
            elif 'sina' in lines:
                url = lines[16:-9]
            elif 'qq.com' in lines:
                url = lines[17:-9]
            elif 'letv.com' in lines:
                url = lines[17:-9]
                break
            elif 'acgvideo' in lines:
                url = lines[17:-9]
            rawurl.append(url)
        if 'backup_url' in lines:
            break
    if rawurl is []:  #hope this never happen
        request = urllib2.Request('http://www.flvcd.com/parse.php?kw='+videourl, headers={ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' })
        request.add_header('Accept-encoding', 'gzip')
        response = urllib2.urlopen(request)
        data = response.read()
        data_list = data.split('\n')
        for items in data_list:
            if 'name' in items and 'inf' in items and 'input' in items:
                c = items
                rawurl = c[39:-5]
                rawurl = rawurl.split('|')
                break
    #print(rawurl)
    vid_num = len(rawurl)
    #print(rawurl)
    print(str(vid_num) + ' videos in part ' + str(part_now) + ' to download, fetch yourself a cup of coffee...')
    for i in range(vid_num):
        print('Downloading ' + str(i+1) + ' of ' + str(vid_num) + ' videos in part ' + str(part_now) + '...')
        #print('aria2c -llog.txt -c -s16 -x16 -k1M --out '+str(i)+'.flv "'+rawurl[i]+'"')
        os.system('aria2c -larialog.txt -c -s16 -x16 -k1M --out '+str(i)+'.flv "'+rawurl[i]+'"')
        #os.system('aria2c -larialog.txt -c -s16 -x16 -k1M --out '+str(i)+'.flv "'+rawurl[i]+'"')
        #not debugging, not fun.
    f = open('ff.txt', 'w')
    ff = ''
    os.getcwd()
    for i in range(vid_num):
        ff = ff + 'file \'' + str(os.getcwd()) + '/'+ str(i) + '.flv\'\n'
    ff = ff.encode("utf8")
    f.write(ff)
    f.close()
    print('Concating videos...')
    os.system('ffmpeg -f concat -i ff.txt -c copy "'+filename+'".mp4')
    os.system('rm -r ff.txt')
    for i in range(vid_num):
        os.system('rm -r '+str(i)+'.flv')
    print('Done, enjoy yourself!')
    exit()
vid = str(raw_input('av'))
p_raw = str(raw_input('P'))
oversea = str(input('Oversea?'))
p_list = []
p_raw = p_raw.split(',')
for item in p_raw:
    if '~' in item:
        #print(item)
        lower = 0
        higher = 0
        item = item.split('~')
        try:
            lower = int(item[0])
        except:
            print('Cannot read lower!')
        try:
            higher = int(item[1])
        except:
            print('Cannot read higher!')
        if lower == 0 or higher == 0:
            if lower == 0 and higher != 0:
                lower = higher
            elif lower != 0 and higher == 0:
                higher = lower
            else:
                print('Cannot find any higher or lower, ignoring...')
                break
        mid = 0
        if higher < lower:
            mid = higher
            higher = lower
            lower = mid
        p_list.append(lower)
        while lower < higher:
            lower = lower + 1
            p_list.append(lower)
        break
    try:
        p_list.append(int(item))
    except:
        print('Cannot read "'+str(item)+'", abondon it.')
        break
p_list = list_del_repeat(p_list)
part_now = '0'
for p in p_list:
    part_now = str(p)
    main(vid, p, oversea)

 

2 thoughts on “Biligrab 0.5: 加入一个新黑科技,加入批量下载

    1. Beining Post author

      其实我是Python3党。。。您看里面的语法就知道了,完全是3的,只不过OSX默认是2.7,我不认为很多人会有3(我自己默认也不是3啊)。于是刻意写了2.7的,希望大家可以拆开就用。
      您的项目我一直在暗地关注,star了。
      我在想具体应该怎么整合比较好。您有建议吗?在逻辑的什么地方加入比较好呢?
      Biligrab最最最最开始是我给自己看新番弄弹幕的小东西,结果越来越大。整合的功能越来越多。
      最开始的想法是在OSX上弄个代替AcDown的东西,因为没有弹幕看太不爽了,OSX上当时根本就没有能好好看弹幕的工具。于是着手移植了Mukioplayer-Py-Mac和ABPlayerHTML5-Py–nix两个东西。但是下载弹幕还是不爽,所以为这两个软件写了这个Biligrab,从只能下载弹幕到包括各种功能乃至3种黑科技(一开始没用you-get,应该是认为这东西太大吧。。。总之就造轮子了)。
      现在Biligrab的逻辑是为上面两个软件服务的。当然我们可以想办法集成一个不复杂的版本进去。。。容我想想放在哪方便。。。

      Reply

Leave a Reply

Your email address will not be published. Required fields are marked *