comparison python/url.py @ 754:f011ec45b8e8

add example load type interface
author Jeff Hammel <k0scist@gmail.com>
date Fri, 03 Jul 2015 21:07:03 -0700
parents
children
comparison
equal deleted inserted replaced
753:05fef8e5b8a9 754:f011ec45b8e8
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 url manipulation
6 """
7
8 import argparse
9 import os
10 import shutil
11 import subprocess
12 import sys
13 import tempfile
14 import urlparse
15 import urllib2
16
17 __all__ = ['load', 'main']
18 string = (str, unicode)
19
20 def ensure_dir(directory):
21 """ensure `directory` is a directory"""
22 if os.path.exists(directory):
23 assert os.path.isdir(directory)
24 return directory
25 os.makedirs(directory)
26 return directory
27
28 def isURL(url):
29 return '://' in url
30
31 def read_s3(url):
32 name = tempfile.mktemp()
33 try:
34 subprocess.check_output(['s3cmd', 'get', url, name])
35 with open(name) as f:
36 read = f.read()
37 os.remove(name)
38 return read
39 finally:
40 if os.path.exists(name):
41 os.remove(name)
42
43 def read_http(url):
44 return urllib2.urlopen(url).read()
45
46 def read_file(url):
47 scheme = 'file://'
48 if url.startswith(scheme):
49 url = url[len(scheme):]
50 return open(url).read()
51
52 loaders = {'s3': read_s3,
53 'http': read_http,
54 'https': read_http,
55 'file': read_file
56 }
57
58 def scheme(url):
59 if '://' in url:
60 parsed = urlparse.urlsplit(url)
61 return parsed.scheme
62 return 'file'
63
64 def parent(url):
65 if '://' in url:
66 return url.rsplit('/', 1)[0]
67 else:
68 # file
69 return os.path.abspath(os.path.dirname(url))
70
71 def basename(url):
72 if '://' in url:
73 return url.rsplit('/', 1)[-1]
74 else:
75 # file
76 return os.path.basename(url)
77
78 def loader(url):
79 return loaders[scheme(url)]
80
81 def load(url):
82 """returns the contents of a URL"""
83 return loader(url)(url)
84
85 def get_file(src, dest):
86 shutil.copy2(src, dest)
87
88 def get_s3(src, dest):
89 subprocess.check_output(['s3cmd', 'get', src, dest])
90
91 def default_getter(src, dest):
92 assert not os.path.isURL(dest)
93 dirname = parent(dest)
94 ensure_dir(dirname)
95 with open(dest, 'w') as f:
96 f.write(load(url))
97
98 getters = {'file': get_file,
99 's3': get_s3
100 }
101
102 def get(src, dest):
103 """get a thing to a local file"""
104 if os.path.isdir(dest):
105 dest = os.path.join(dest, basename(src))
106 return getters.get(scheme(src), default_getter)(src, dest)
107
108 def rel(base, path):
109 """
110 relative path to base
111 otherwise, return None
112 """
113
114 if path.startswith(base):
115 return path[len(base):]
116
117 def main(args=sys.argv[1:]):
118 """CLI"""
119
120 # parse command line
121 parser = argparse.ArgumentParser(description=__doc__)
122 parser.add_argument('url', help='URL to read')
123 parser.add_argument('-o', '--output', dest='output',
124 help="get to this location")
125 options = parser.parse_args(args)
126
127 if options.output:
128 # copy src to this location
129 get(options.url, options.output)
130 sys.exit()
131
132 # read location
133 contents = load(options.url)
134
135 # output
136 print (contents)
137
138 if __name__ == '__main__':
139 main()