Doge log

Abby CTO 雑賀 力王のオフィシャルサイトです

eventletのクライアントコードサンプル

書いてる人ほぼ見たことないので書いておくかな。

from os import path
import eventlet
from eventlet.green import urllib2
from pyquery import PyQuery as pq
from urlparse import urlparse

empflix_url = 'http://www.empflix.com/browsecat.php?page=%s&chid=17&category=mr'

save_path = "/tmp"
pool = eventlet.GreenPool(2)

def get_asian(page=1):
    q = []
    conn = urllib2.urlopen(empflix_url % page)
    page = conn.read()
    d = pq(page)
    for span in d(".thumb"):
        detail_url = pq(span.find("a")).attr.href
        q.append(detail_url)
    return q

def get_download_url(url):
    conn = urllib2.urlopen(url)
    page = "".join(conn.readlines())
    d = pq(page)
    download_url = d(".linkRight a:first").attr.href
    parsed = urlparse(download_url)    
    file_name = parsed.path.split("/")[-1]
    return url, download_url, file_name

def download_flv(url, down_url, file_name):
    out_path = path.join(save_path, file_name)
    if not file_name:
        return

    partial = False
    conn = urllib2.urlopen(down_url)
    if path.exists(out_path):
        length = conn.info()['Content-Length']
        size = path.getsize(out_path)
        if size < length:
            r = "bytes=%s-" % size 
            req = urllib2.Request(down_url,
                    headers={"Range":r})
            conn = urllib2.urlopen(req)
            print "Resume!! %s. %s" % (down_url, file_name)
            partial = True
        else:
            print "finish %s. %s" % (down_url, file_name)
            return

    if partial:
        f = open(out_path, "rb+")
        f.seek(0, 2)
    else:
        f = open(out_path, "wb")
    
    print "start %s. %s" % (down_url, file_name)
    while True:
        data = conn.read(1024 * 8 )
        if not data:
            break
        f.write(data)
    print "finish %s. %s" % (down_url, file_name)

def download(url):
    url, download_url, file_name = get_download_url(url)
    download_flv(url, download_url, file_name)

def start(min_page=1, max_page=5):
    q = []
    for i in xrange(min_page, max_page+1):
        urls = get_asian(page=i)
        q.extend(urls)
    q.reverse()
    while q:
        url = q.pop()
        pool.spawn_n(download, url)
    pool.waitall()

if __name__ == '__main__':
    start()