view fetch.py @ 18:64f89df1b966

comment
author Jeff Hammel <jhammel@mozilla.com>
date Wed, 09 Nov 2011 16:58:03 -0800
parents e2af4bc5159c
children d69041957c0e
line wrap: on
line source

#!/usr/bin/env python

"""
fetch stuff from the interwebs
"""

import os
import sys
import optparse

__all__ = ['Fetcher', 'Fetch', 'main']

def which(executable, path=os.environ['PATH']):
    """python equivalent of which; should really be in the stdlib"""
    dirs = path.split(os.pathsep)
    for dir in dirs:
        if os.path.isfile(os.path.join(dir, executable)):
            return os.path.join(dir, executable)

class Fetcher(object):
    """abstract base class for resource fetchers"""

    @classmethod
    def match(cls, _type):
        return _type == cls.type

    def __init__(self, url, clobber=False):
        self.subpath = None
        if '#' in url:
            url, self.subpath = url.rsplit('#')
        self.url = url
        # self.clobber = clobber # unused

    def __call__(self, dest):
        raise NotImplementedError("Should be called by implementing class")

    @classmethod
    def doc(cls):
        """return docstring for the instance"""
        retval = getattr(cls, '__doc__', '').strip()
        return ' '.join(retval.split())

### standard dispatchers - always available

import tarfile
import urllib2
from StringIO import StringIO

class FileFetcher(Fetcher):
    """fetch a single file"""
  
    type = 'file'

    @classmethod
    def download(cls, url):
        return urllib2.urlopen(url).read()

    def __call__(self, dest):
        if os.path.isdir(dest):
            filename = self.url.rsplit('/', 1)[-1]
            dest = os.path.join(dest, filename)
        f = file(dest, 'w')
        f.write(self.download(self.url))
        f.close()


class TarballFetcher(FileFetcher):
    """fetch and extract a tarball"""

    type = 'tar'

    def __call__(self, dest):
        assert os.path.isdir(dest)
        if self.subpath:
            raise NotImplementedError("should extract only a subpath of a tarball but I haven't finished it yet")
        buffer = StringIO()
        buffer.write(self.download(self.url))
        buffer.seek(0)
        tf = tarfile.open(mode='r', fileobj=buffer)
        tf.extract(dest)

fetchers = [FileFetcher, TarballFetcher]

### VCS fetchers using executable

import subprocess

class VCSFetcher(Fetcher):
    def __init__(self, url, export=True):
        """
        - export : whether to strip the versioning information
        """
        Fetcher.__init__(self, url)
        self.export = export

if which('hg'):

    class HgFetcher(VCSFetcher):
        """checkout a mercurial repository"""
        type = 'hg'

        def __call__(self, dest):
            if os.path.exits(dest):
                assert os.path.isdir(dest) and os.path.exists(os.path.join(dest, '.hg'))
            raise NotImplementedError("TODO! Sorry!")

    fetchers.append(HgFetcher)

if which('git'):

    class GitFetcher(Fetcher):
        """checkout a git repository"""
        type = 'git'

    fetchers

__all__ += [i.__name__ for i in fetchers]

class Fetch(object):
  
    def __init__(self, fetchers, relative_to=None, strict=True):
        self.fetchers = fetchers
        self.relative_to = relative_to
        self.strict = strict

    def fetcher(self, _type):
        """find the fetcher for the appropriate type"""
        for fetcher in fetchers:
            if fetcher.match(_type):
                return fetcher

    def __call__(self, url, destination, type, **options):
        fetcher = self.fetcher(type)
        assert fetcher is not None, "No fetcher found for type '%s'" % type
        fetcher = fetcher(url, **options)
        fetcher(destination)

    def fetch(self, *items):

        if self.strict:
            # ensure all the required fetchers are available
            types = set([i['type'] for i in items])
            assert not [i for i in types
                        if [True for fetcher in fetchers if fetcher.match(i)]]

        for item in items:

            # fix up relative paths
            dest = item['dest']
            if not os.path.isabs(dest):
                relative_to = self.relative_to or os.path.dirname(os.path.abspath(item['manifest']))
                dest = os.path.join(relative_to, dest)

            # fetch the items
            self(item['url'], destination=dest, type=item['type'], **item['options'])

format_string = "[URL] [destination] [type] <options>"
def read_manifests(*manifests):
    """
    read some manifests and return the items
    
    Format:
    %s
    """ % format_string

    # sanity check
    assert not [i for i in manifests if not os.path.exists(i)]

    retval = []

    for manifest in manifests:
        for line in file(i).readlines():
            line = line.strip()
            if line.startswith('#') or not line:
                continue
            line = line.split()
            if len(line) not in (3,4):
                raise Exception("Format should be: %s; line %s" % (format_string, line))
            options = {}
            if len(line) == 4:
                option_string = line.pop().rstrip(',')
                try:
                    options = dict([[j.strip() for j in i.split('=', 1)]
                                    for i in option_string.split(',')])
                except:
                    raise Exception("Options format should be: key=value,key2=value2,...; got %s" % option_string)

            url, dest, _type = line
            retval.append(dict(url=url, dest=dest, type=_type, options=options, manifest=manifest))
    return retval

def main(args=sys.argv[1:]):

    # parse command line options
    usage = '%prog [options] manifest [manifest] [...]'

    class PlainDescriptionFormatter(optparse.IndentedHelpFormatter):
        def format_description(self, description):
            if description:
                return description + '\n'
            else:
                return ''
  
    parser = optparse.OptionParser(usage=usage, description=__doc__, formatter=PlainDescriptionFormatter())
    parser.add_option('-o', '--output',
                      help="output relative to this location vs. the manifest location")
    parser.add_option('-d', '--dest', # XXX unused
                      action='append',
                      help="output only these destinations")
    parser.add_option('-s', '--strict',
                      action='store_true', default=False,
                      help="fail on error")
    parser.add_option('--list-fetchers', dest='list_fetchers',
                      action='store_true', default=False,
                      help='list available fetchers and exit')
    options, args = parser.parse_args(args)

    if options.list_fetchers:
        types = set()
        for fetcher in fetchers:
            if fetcher.type in types:
                continue # occluded, should probably display separately
            print '%s : %s' % (fetcher.type, fetcher.doc())
            types.add(fetcher.type)
        parser.exit()

    if not args:
        # TODO: could read from stdin
        parser.print_help()
        parser.exit()

    items = read_manifests(*args)
    fetch = Fetch(fetchers, strict=options.strict)

    # download the files
    fetch.fetch(*items)

if __name__ == '__main__':
    main()