Mercurial > hg > fetch
view fetch.py @ 18:64f89df1b966
comment
author | Jeff Hammel <jhammel@mozilla.com> |
---|---|
date | Wed, 09 Nov 2011 16:58:03 -0800 |
parents | e2af4bc5159c |
children | d69041957c0e |
line wrap: on
line source
#!/usr/bin/env python """ fetch stuff from the interwebs """ import os import sys import optparse __all__ = ['Fetcher', 'Fetch', 'main'] def which(executable, path=os.environ['PATH']): """python equivalent of which; should really be in the stdlib""" dirs = path.split(os.pathsep) for dir in dirs: if os.path.isfile(os.path.join(dir, executable)): return os.path.join(dir, executable) class Fetcher(object): """abstract base class for resource fetchers""" @classmethod def match(cls, _type): return _type == cls.type def __init__(self, url, clobber=False): self.subpath = None if '#' in url: url, self.subpath = url.rsplit('#') self.url = url # self.clobber = clobber # unused def __call__(self, dest): raise NotImplementedError("Should be called by implementing class") @classmethod def doc(cls): """return docstring for the instance""" retval = getattr(cls, '__doc__', '').strip() return ' '.join(retval.split()) ### standard dispatchers - always available import tarfile import urllib2 from StringIO import StringIO class FileFetcher(Fetcher): """fetch a single file""" type = 'file' @classmethod def download(cls, url): return urllib2.urlopen(url).read() def __call__(self, dest): if os.path.isdir(dest): filename = self.url.rsplit('/', 1)[-1] dest = os.path.join(dest, filename) f = file(dest, 'w') f.write(self.download(self.url)) f.close() class TarballFetcher(FileFetcher): """fetch and extract a tarball""" type = 'tar' def __call__(self, dest): assert os.path.isdir(dest) if self.subpath: raise NotImplementedError("should extract only a subpath of a tarball but I haven't finished it yet") buffer = StringIO() buffer.write(self.download(self.url)) buffer.seek(0) tf = tarfile.open(mode='r', fileobj=buffer) tf.extract(dest) fetchers = [FileFetcher, TarballFetcher] ### VCS fetchers using executable import subprocess class VCSFetcher(Fetcher): def __init__(self, url, export=True): """ - export : whether to strip the versioning information """ Fetcher.__init__(self, url) self.export = export if which('hg'): class HgFetcher(VCSFetcher): """checkout a mercurial repository""" type = 'hg' def __call__(self, dest): if os.path.exits(dest): assert os.path.isdir(dest) and os.path.exists(os.path.join(dest, '.hg')) raise NotImplementedError("TODO! Sorry!") fetchers.append(HgFetcher) if which('git'): class GitFetcher(Fetcher): """checkout a git repository""" type = 'git' fetchers __all__ += [i.__name__ for i in fetchers] class Fetch(object): def __init__(self, fetchers, relative_to=None, strict=True): self.fetchers = fetchers self.relative_to = relative_to self.strict = strict def fetcher(self, _type): """find the fetcher for the appropriate type""" for fetcher in fetchers: if fetcher.match(_type): return fetcher def __call__(self, url, destination, type, **options): fetcher = self.fetcher(type) assert fetcher is not None, "No fetcher found for type '%s'" % type fetcher = fetcher(url, **options) fetcher(destination) def fetch(self, *items): if self.strict: # ensure all the required fetchers are available types = set([i['type'] for i in items]) assert not [i for i in types if [True for fetcher in fetchers if fetcher.match(i)]] for item in items: # fix up relative paths dest = item['dest'] if not os.path.isabs(dest): relative_to = self.relative_to or os.path.dirname(os.path.abspath(item['manifest'])) dest = os.path.join(relative_to, dest) # fetch the items self(item['url'], destination=dest, type=item['type'], **item['options']) format_string = "[URL] [destination] [type] <options>" def read_manifests(*manifests): """ read some manifests and return the items Format: %s """ % format_string # sanity check assert not [i for i in manifests if not os.path.exists(i)] retval = [] for manifest in manifests: for line in file(i).readlines(): line = line.strip() if line.startswith('#') or not line: continue line = line.split() if len(line) not in (3,4): raise Exception("Format should be: %s; line %s" % (format_string, line)) options = {} if len(line) == 4: option_string = line.pop().rstrip(',') try: options = dict([[j.strip() for j in i.split('=', 1)] for i in option_string.split(',')]) except: raise Exception("Options format should be: key=value,key2=value2,...; got %s" % option_string) url, dest, _type = line retval.append(dict(url=url, dest=dest, type=_type, options=options, manifest=manifest)) return retval def main(args=sys.argv[1:]): # parse command line options usage = '%prog [options] manifest [manifest] [...]' class PlainDescriptionFormatter(optparse.IndentedHelpFormatter): def format_description(self, description): if description: return description + '\n' else: return '' parser = optparse.OptionParser(usage=usage, description=__doc__, formatter=PlainDescriptionFormatter()) parser.add_option('-o', '--output', help="output relative to this location vs. the manifest location") parser.add_option('-d', '--dest', # XXX unused action='append', help="output only these destinations") parser.add_option('-s', '--strict', action='store_true', default=False, help="fail on error") parser.add_option('--list-fetchers', dest='list_fetchers', action='store_true', default=False, help='list available fetchers and exit') options, args = parser.parse_args(args) if options.list_fetchers: types = set() for fetcher in fetchers: if fetcher.type in types: continue # occluded, should probably display separately print '%s : %s' % (fetcher.type, fetcher.doc()) types.add(fetcher.type) parser.exit() if not args: # TODO: could read from stdin parser.print_help() parser.exit() items = read_manifests(*args) fetch = Fetch(fetchers, strict=options.strict) # download the files fetch.fetch(*items) if __name__ == '__main__': main()