diff fetch.py @ 13:3fee8ecd1af8

restructure while we still just have one module
author Jeff Hammel <jhammel@mozilla.com>
date Wed, 09 Nov 2011 16:15:53 -0800
parents fetch/main.py@726c3d288733
children bc7d6763357e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fetch.py	Wed Nov 09 16:15:53 2011 -0800
@@ -0,0 +1,220 @@
+#!/usr/bin/env python
+
+"""
+fetch stuff from the interwebs
+"""
+
+import os
+import sys
+import optparse
+
+__all__ = ['Fetcher', 'Fetch', 'main']
+
+def which(executable, path=os.environ['PATH']):
+  """python equivalent of which; should really be in the stdlib"""
+  # XXX from https://github.com/mozautomation/mozmill/blob/master/mozrunner/mozrunner/utils.py
+  dirs = path.split(os.pathsep)
+  for dir in dirs:
+    if os.path.isfile(os.path.join(dir, executable)):
+      return os.path.join(dir, executable)
+
+
+class Fetcher(object):
+  """abstract base class for resource fetchers"""
+
+  @classmethod
+  def match(cls, _type):
+    return _type == cls.type
+
+  def __init__(self, url):
+    self.url = url
+
+  def __call__(self, dest):
+    raise NotImplementedError
+
+### standard dispatchers - always available
+
+import tarfile
+import urllib2
+from StringIO import StringIO
+
+class FileFetcher(Fetcher):
+  """fetch a single file"""
+  
+  type = 'file'
+
+  @classmethod
+  def download(cls, url):
+    return urllib2.urlopen(url).read()
+
+  def __call__(self, dest):
+    if os.path.isdir(dest):
+      filename = self.url.rsplit('/', 1)[-1]
+      dest = os.path.join(dest, filename)
+    f = file(dest, 'w')
+    f.write(self.download(self.url))
+    f.close()
+
+
+class TarballFetcher(FileFetcher):
+  """fetch and extract a tarball"""
+
+  type = 'tar'
+
+  def __call__(self, dest):
+    assert os.path.isdir(dest)
+    buffer = StringIO()
+    buffer.write(self.download(self.url))
+    buffer.seek(0)
+    tf = tarfile.open(mode='r', fileobj=buffer)
+    tf.extract(dest)
+
+fetchers = [FileFetcher, TarballFetcher]
+
+### VCS fetchers using executable
+
+import subprocess
+
+if which('hg'):
+
+  class HgFetcher(Fetcher):
+    """checkout a mercurial repository"""
+    type = 'hg'
+
+  def __call__(self, dest):
+    if os.path.exits(dest):
+      assert os.path.isdir(dest) and os.path.exists(os.path.join(dest, '.hg'))
+      pass # TODO
+
+  fetchers.append(HgFetcher)
+
+class GitFetcher(Fetcher):
+  """checkout a git repository"""
+  type = 'git'
+
+
+fetchers = dict([(i.__name__, i) for i in fetchers])
+__all__ += fetchers.keys()
+
+
+class Fetch(object):
+  
+  def __init__(self, fetchers, relative_to=None, strict=True):
+    self.fetchers = fetchers
+    self.relative_to = relative_to
+    self.strict = strict
+
+  def fetcher(self, _type):
+    """find the fetcher for the appropriate type"""
+    for fetcher in fetchers:
+      if fetcher.match(_type):
+        return fetcher
+
+  def __call__(self, url, destination, type, **options):
+    fetcher = self.fetcher(type)
+    assert fetcher is not None
+    fetcher = fetcher(url, **options)
+    fetcher(destination)
+
+  def fetch(self, *items):
+
+    if self.strict:
+      # ensure all the required fetchers are available
+      types = set([i['type'] for i in items])
+      assert not [i for i in types
+                  if [True for fetcher in fetchers if fetcher.match(i)]]
+
+    for item in items:
+
+      # fix up relative paths
+      dest = item['dest']
+      if not os.path.isabs(dest):
+        if self.relative_to:
+          dest = os.path.join(self.relative_to, dest)
+        else:
+          dest = os.path.join(os.path.dirname(os.path.abspath(item['manifest'])), dest)
+
+      # fetch the items
+      self(item['url'], destination=dest, type=item['type'], **item['options'])
+
+
+format_string = "[URL] [destination] [type] <options>"
+def read_manifests(*manifests):
+  """
+  read some manifests and return the items
+
+  Format:
+  %s
+  """ % format_string
+
+  # sanity check
+  assert not [i for i in manifests if not os.path.exists(i)]
+
+  retval = []
+
+  for manifest in manifests:
+    for line in file(i).readlines():
+      line = line.strip()
+      if line.startswith('#') or not line:
+        continue
+      line = line.split()
+      if len(line) not in (3,4):
+        raise Exception("Format should be: %s; line %s" % (format_string, line))
+      options = {}
+      if len(line) == 4:
+        option_string = line.pop().rstrip(',')
+        try:
+          options = dict([[j.strip() for j in i.split('=', 1)]
+                          for i in option_string.split(',')])
+        except:
+          raise Exception("Options format should be: key=value,key2=value2,...; got %s" % option_string)
+
+      url, dest, _type = line
+      retval.append(dict(url=url, dest=dest, type=_type, options=options, manifest=manifest))
+  return retval
+
+def main(args=sys.argv[1:]):
+
+  # parse command line options
+  usage = '%prog [options] manifest [manifest] [...]'
+
+  # description formatter
+  class PlainDescriptionFormatter(optparse.IndentedHelpFormatter):
+    def format_description(self, description):
+      if description:
+        return description + '\n'
+      else:
+        return ''
+  
+  parser = optparse.OptionParser(usage=usage, description=__doc__, formatter=PlainDescriptionFormatter())
+  parser.add_option('-o', '--output',
+                    help="output relative to this location vs. the manifest location")
+  parser.add_option('-d', '--dest',
+                    action='append',
+                    help="output only these destinations")
+  parser.add_option('-s', '--strict',
+                    action='store_true', default=False,
+                    help="fail on error")
+  parser.add_option('--list-fetchers', dest='list_fetchers',
+                    action='store_true', default=False,
+                    help='list available fetchers and exit')
+  options, args = parser.parse_args(args)
+
+  if options.list_fetchers:
+    for name in sorted(fetchers.keys()):
+      print name
+    parser.exit()
+
+  if not args:
+    parser.print_help()
+    parser.exit()
+
+  items = read_manifests(*args)
+  fetch = Fetch(fetchers.values(), strict=options.strict)
+
+  # download the files
+  fetch.fetch(*items)
+
+if __name__ == '__main__':
+  main()  
+