view fetch.py @ 13:3fee8ecd1af8

restructure while we still just have one module
author Jeff Hammel <jhammel@mozilla.com>
date Wed, 09 Nov 2011 16:15:53 -0800
parents fetch/main.py@726c3d288733
children bc7d6763357e
line wrap: on
line source

#!/usr/bin/env python

"""
fetch stuff from the interwebs
"""

import os
import sys
import optparse

__all__ = ['Fetcher', 'Fetch', 'main']

def which(executable, path=os.environ['PATH']):
  """python equivalent of which; should really be in the stdlib"""
  # XXX from https://github.com/mozautomation/mozmill/blob/master/mozrunner/mozrunner/utils.py
  dirs = path.split(os.pathsep)
  for dir in dirs:
    if os.path.isfile(os.path.join(dir, executable)):
      return os.path.join(dir, executable)


class Fetcher(object):
  """abstract base class for resource fetchers"""

  @classmethod
  def match(cls, _type):
    return _type == cls.type

  def __init__(self, url):
    self.url = url

  def __call__(self, dest):
    raise NotImplementedError

### standard dispatchers - always available

import tarfile
import urllib2
from StringIO import StringIO

class FileFetcher(Fetcher):
  """fetch a single file"""
  
  type = 'file'

  @classmethod
  def download(cls, url):
    return urllib2.urlopen(url).read()

  def __call__(self, dest):
    if os.path.isdir(dest):
      filename = self.url.rsplit('/', 1)[-1]
      dest = os.path.join(dest, filename)
    f = file(dest, 'w')
    f.write(self.download(self.url))
    f.close()


class TarballFetcher(FileFetcher):
  """fetch and extract a tarball"""

  type = 'tar'

  def __call__(self, dest):
    assert os.path.isdir(dest)
    buffer = StringIO()
    buffer.write(self.download(self.url))
    buffer.seek(0)
    tf = tarfile.open(mode='r', fileobj=buffer)
    tf.extract(dest)

fetchers = [FileFetcher, TarballFetcher]

### VCS fetchers using executable

import subprocess

if which('hg'):

  class HgFetcher(Fetcher):
    """checkout a mercurial repository"""
    type = 'hg'

  def __call__(self, dest):
    if os.path.exits(dest):
      assert os.path.isdir(dest) and os.path.exists(os.path.join(dest, '.hg'))
      pass # TODO

  fetchers.append(HgFetcher)

class GitFetcher(Fetcher):
  """checkout a git repository"""
  type = 'git'


fetchers = dict([(i.__name__, i) for i in fetchers])
__all__ += fetchers.keys()


class Fetch(object):
  
  def __init__(self, fetchers, relative_to=None, strict=True):
    self.fetchers = fetchers
    self.relative_to = relative_to
    self.strict = strict

  def fetcher(self, _type):
    """find the fetcher for the appropriate type"""
    for fetcher in fetchers:
      if fetcher.match(_type):
        return fetcher

  def __call__(self, url, destination, type, **options):
    fetcher = self.fetcher(type)
    assert fetcher is not None
    fetcher = fetcher(url, **options)
    fetcher(destination)

  def fetch(self, *items):

    if self.strict:
      # ensure all the required fetchers are available
      types = set([i['type'] for i in items])
      assert not [i for i in types
                  if [True for fetcher in fetchers if fetcher.match(i)]]

    for item in items:

      # fix up relative paths
      dest = item['dest']
      if not os.path.isabs(dest):
        if self.relative_to:
          dest = os.path.join(self.relative_to, dest)
        else:
          dest = os.path.join(os.path.dirname(os.path.abspath(item['manifest'])), dest)

      # fetch the items
      self(item['url'], destination=dest, type=item['type'], **item['options'])


format_string = "[URL] [destination] [type] <options>"
def read_manifests(*manifests):
  """
  read some manifests and return the items

  Format:
  %s
  """ % format_string

  # sanity check
  assert not [i for i in manifests if not os.path.exists(i)]

  retval = []

  for manifest in manifests:
    for line in file(i).readlines():
      line = line.strip()
      if line.startswith('#') or not line:
        continue
      line = line.split()
      if len(line) not in (3,4):
        raise Exception("Format should be: %s; line %s" % (format_string, line))
      options = {}
      if len(line) == 4:
        option_string = line.pop().rstrip(',')
        try:
          options = dict([[j.strip() for j in i.split('=', 1)]
                          for i in option_string.split(',')])
        except:
          raise Exception("Options format should be: key=value,key2=value2,...; got %s" % option_string)

      url, dest, _type = line
      retval.append(dict(url=url, dest=dest, type=_type, options=options, manifest=manifest))
  return retval

def main(args=sys.argv[1:]):

  # parse command line options
  usage = '%prog [options] manifest [manifest] [...]'

  # description formatter
  class PlainDescriptionFormatter(optparse.IndentedHelpFormatter):
    def format_description(self, description):
      if description:
        return description + '\n'
      else:
        return ''
  
  parser = optparse.OptionParser(usage=usage, description=__doc__, formatter=PlainDescriptionFormatter())
  parser.add_option('-o', '--output',
                    help="output relative to this location vs. the manifest location")
  parser.add_option('-d', '--dest',
                    action='append',
                    help="output only these destinations")
  parser.add_option('-s', '--strict',
                    action='store_true', default=False,
                    help="fail on error")
  parser.add_option('--list-fetchers', dest='list_fetchers',
                    action='store_true', default=False,
                    help='list available fetchers and exit')
  options, args = parser.parse_args(args)

  if options.list_fetchers:
    for name in sorted(fetchers.keys()):
      print name
    parser.exit()

  if not args:
    parser.print_help()
    parser.exit()

  items = read_manifests(*args)
  fetch = Fetch(fetchers.values(), strict=options.strict)

  # download the files
  fetch.fetch(*items)

if __name__ == '__main__':
  main()