Mercurial > hg > config
comparison python/url.py @ 754:f011ec45b8e8
add example load type interface
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Fri, 03 Jul 2015 21:07:03 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
753:05fef8e5b8a9 | 754:f011ec45b8e8 |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 """ | |
5 url manipulation | |
6 """ | |
7 | |
8 import argparse | |
9 import os | |
10 import shutil | |
11 import subprocess | |
12 import sys | |
13 import tempfile | |
14 import urlparse | |
15 import urllib2 | |
16 | |
17 __all__ = ['load', 'main'] | |
18 string = (str, unicode) | |
19 | |
20 def ensure_dir(directory): | |
21 """ensure `directory` is a directory""" | |
22 if os.path.exists(directory): | |
23 assert os.path.isdir(directory) | |
24 return directory | |
25 os.makedirs(directory) | |
26 return directory | |
27 | |
28 def isURL(url): | |
29 return '://' in url | |
30 | |
31 def read_s3(url): | |
32 name = tempfile.mktemp() | |
33 try: | |
34 subprocess.check_output(['s3cmd', 'get', url, name]) | |
35 with open(name) as f: | |
36 read = f.read() | |
37 os.remove(name) | |
38 return read | |
39 finally: | |
40 if os.path.exists(name): | |
41 os.remove(name) | |
42 | |
43 def read_http(url): | |
44 return urllib2.urlopen(url).read() | |
45 | |
46 def read_file(url): | |
47 scheme = 'file://' | |
48 if url.startswith(scheme): | |
49 url = url[len(scheme):] | |
50 return open(url).read() | |
51 | |
52 loaders = {'s3': read_s3, | |
53 'http': read_http, | |
54 'https': read_http, | |
55 'file': read_file | |
56 } | |
57 | |
58 def scheme(url): | |
59 if '://' in url: | |
60 parsed = urlparse.urlsplit(url) | |
61 return parsed.scheme | |
62 return 'file' | |
63 | |
64 def parent(url): | |
65 if '://' in url: | |
66 return url.rsplit('/', 1)[0] | |
67 else: | |
68 # file | |
69 return os.path.abspath(os.path.dirname(url)) | |
70 | |
71 def basename(url): | |
72 if '://' in url: | |
73 return url.rsplit('/', 1)[-1] | |
74 else: | |
75 # file | |
76 return os.path.basename(url) | |
77 | |
78 def loader(url): | |
79 return loaders[scheme(url)] | |
80 | |
81 def load(url): | |
82 """returns the contents of a URL""" | |
83 return loader(url)(url) | |
84 | |
85 def get_file(src, dest): | |
86 shutil.copy2(src, dest) | |
87 | |
88 def get_s3(src, dest): | |
89 subprocess.check_output(['s3cmd', 'get', src, dest]) | |
90 | |
91 def default_getter(src, dest): | |
92 assert not os.path.isURL(dest) | |
93 dirname = parent(dest) | |
94 ensure_dir(dirname) | |
95 with open(dest, 'w') as f: | |
96 f.write(load(url)) | |
97 | |
98 getters = {'file': get_file, | |
99 's3': get_s3 | |
100 } | |
101 | |
102 def get(src, dest): | |
103 """get a thing to a local file""" | |
104 if os.path.isdir(dest): | |
105 dest = os.path.join(dest, basename(src)) | |
106 return getters.get(scheme(src), default_getter)(src, dest) | |
107 | |
108 def rel(base, path): | |
109 """ | |
110 relative path to base | |
111 otherwise, return None | |
112 """ | |
113 | |
114 if path.startswith(base): | |
115 return path[len(base):] | |
116 | |
117 def main(args=sys.argv[1:]): | |
118 """CLI""" | |
119 | |
120 # parse command line | |
121 parser = argparse.ArgumentParser(description=__doc__) | |
122 parser.add_argument('url', help='URL to read') | |
123 parser.add_argument('-o', '--output', dest='output', | |
124 help="get to this location") | |
125 options = parser.parse_args(args) | |
126 | |
127 if options.output: | |
128 # copy src to this location | |
129 get(options.url, options.output) | |
130 sys.exit() | |
131 | |
132 # read location | |
133 contents = load(options.url) | |
134 | |
135 # output | |
136 print (contents) | |
137 | |
138 if __name__ == '__main__': | |
139 main() |