TwistedでRedTubeのビデオを全てダウンロードする
む。誰も興味ないのか。
じゃあ全部ダウンロードする奴書いてみるか。
一応20〜30同時に落としても怒られません。
100とかになるとわからんけど。
403が出るのでそいつをケツに突っ込みます。
あとはretryってやつはログ代わりに出してるだけです。
from twisted.web import client from twisted.internet import reactor URL = 'http://dl.redtube.com/_videos_t4vn23s9jc5498tgj49icfj4678/%s/%s.flv' map = ["R", "1", "5", "3", "4", "2", "O", "7", "K", "9", "H", "B", "C", "D", "X", "F", "G", "A", "I", "J", "8", "L", "M", "Z", "6", "P", "Q", "0", "S", "T", "U", "V", "W", "E", "Y", "N"] def get_video_id(id): dir_id = str(int(id / 1000)).zfill(7) id = str(id).zfill(7) my_int = 0 for x in xrange(7): my_int = my_int + int(id[x]) * (x + 1) my_char = str(my_int) my_int = 0 for x in my_char: my_int = my_int + int(x) new_char = str(my_int).zfill(2) mapping = '' mapping = mapping + map[ord(id[3]) - 48 + my_int + 3] mapping = mapping + new_char[1] mapping = mapping + map[ord(id[0]) - 48 + my_int + 2] mapping = mapping + map[ord(id[2]) - 48 + my_int + 1] mapping = mapping + map[ord(id[5]) - 48 + my_int + 6] mapping = mapping + map[ord(id[1]) - 48 + my_int + 5] mapping = mapping + new_char[0] mapping = mapping + map[ord(id[4]) - 48 + my_int + 7] mapping = mapping + map[ord(id[6]) - 48 + my_int + 4] return dir_id, mapping ids = [] retry = [] def download(list): if len(list): id = list.pop(0) downloadFlv(list, id, ('/Users/xxxxxx/video/%s.flv' % str(id).zfill(8))) else: reactor.stop() def downloadFlv(list, id, file): def finish(data): print "%s finished." % id print "retry %s " % retry download(list) def err(e): print id, e r = e.value if r: status = r.status if status == 403 or status == '403': ids.append(id) retry.append(id) download(list) url = URL % get_video_id(id) print id, url client.downloadPage(url, file, supportPartial=1).addCallback(finish).addErrback(err); def start(): ids = range(1,9000) for x in xrange(20): download(ids) if __name__ == '__main__': start() reactor.run()
まあ9000とか適当ですけど。
2000ファイルぐらいで20Gいかないぐらいなので一日で全部落とせるはずです。
うくく。