#!/usr/bin/env python """Download FLV from youtube page""" import sys from optparse import OptionParser import logging as log import urllib, urllib2, cookielib import re import os __version__ = '0.1' __author__ = 'cj_ ' __license__ = 'GPL' __usage__ = '%prog [options] ' __all__ = ['YouTube', 'LogRedirects', 'UserAgent', 'ProgressBar'] __agent__ = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)' __size_abbr__ = ['B', 'K', 'M', 'G', 'T'] class LogRedirects(urllib2.HTTPRedirectHandler): """log redirects to console""" def redirect_request(self, *args): log.debug('* %s' % args[-1]) return urllib2.HTTPRedirectHandler.redirect_request(self, *args) class YouTube(object): __base_url__ = 'http://youtube.com' __re_swfArgs__ = re.compile(r'var\s+swfArgs\s*=\s*{(.*)}\s*;') __re_quoted__ = re.compile(r'([\'"])(.*[^\\])\1') __re_title__ = re.compile(r'YouTube - (.*?)', re.I) __re_unsafe__ = re.compile(r'[^a-z0-9-._ ]', re.I) def __init__(self, log_level=log.INFO): progress = True if log_level <= log.INFO else False self.ua = UserAgent(progress=progress) def fetch_video(self, url): doc = self.ua.fetch(url) # detect title and cleanse for filename title = YouTube.__re_title__.search(doc).group(1) log.debug('video: %s' % title) title = YouTube.__re_unsafe__.sub('', title) title = title.replace(' ', '_') filename = title + '.flv' if os.path.exists(filename): i = 1 while True: new = filename.replace('.flv', '') + '.' + str(i) + '.flv' if not os.path.exists(new): filename = new break i = i + 1 log.debug('file exists, saving to %s' % filename) # extract url args = YouTube.__re_swfArgs__.search(doc).group(1).split(',') opts = {} for arg in args: key, val = arg.split(':', 1) val = YouTube.__re_quoted__.search(val).group(2) opts[key] = val new_url = YouTube.__base_url__ + '/get_video?' + urllib.urlencode(opts) # download and save video self.ua.fetch(new_url, referer=url, save=filename) class UserAgent(object): __blocksize__ = 16 * 1024 def __init__(self, progress=False): self.progress = progress self.cj = cookielib.CookieJar() self.ch = urllib2.HTTPCookieProcessor(self.cj) self.opener = urllib2.build_opener(self.ch, LogRedirects) self.opener.addheaders = [('User-Agent', __agent__)] def fetch(self, url, referer=None, save=None): log.debug('* %s' % url) try: req = urllib2.Request(url) if referer is not None: req.add_header('Referer', referer) res = self.opener.open(req) size = res.headers.getheader('content-length') if save is not None: log.info('Downloading %s to %s' % (human_readable(size), save)) if self.progress: pb = ProgressBar(size=size) read = 0 fi = open(save, 'wb') try: while True: block = res.read(UserAgent.__blocksize__) read += len(block) if not len(block): break fi.write(block) if self.progress: pb.update(read) finally: fi.close() else: return res.read() except Exception, e: log.warn("couldn't load page %s: %s" % (url, e)) return '' class ProgressBar(object): """Class to draw a status bar""" def __init__(self, size=100, width=72, pipe=sys.stderr, pos=1): self.size = float(size) self.width = float(width) self.pipe=pipe self.lastCol = 0 pipe.write('|' + ('-' * (width - 2)) + '|\n') self.update(pos) def update(self, pos): """Update drawing with current position""" curCol = int(round(float(pos) / self.size * self.width)) if curCol > self.lastCol: self.pipe.write('*' * (curCol - self.lastCol)) self.lastCol = curCol if pos >= self.size: self.pipe.write('\n') def human_readable(bytes): i = 0 while bytes >= 1024: bytes = float(bytes) / 1024 i = i + 1 return '%.1f%s' % (bytes, __size_abbr__[i]) def main(): op = OptionParser(version=__version__, usage=__usage__) op.add_option('-q', '--quiet', dest='log_level', action='store_const', default=log.INFO, const=log.WARN, help='suppress output') op.add_option('-d', '--debug', dest='log_level', action='store_const', const=log.DEBUG, help='show debugging information') opts, args = op.parse_args() log.basicConfig(level=opts.log_level, format='%(message)s') if len(args) < 1: op.print_help() return 1 yt = YouTube(log_level=opts.log_level) for url in args: yt.fetch_video(url) return 0 if __name__ == '__main__': sys.exit(main())