comparison bin/wgrep.py @ 0:f3ab51c79813

adding configuration from https://svn.openplans.org/svn/config_jhammel/
author k0s <k0scist@gmail.com>
date Thu, 15 Oct 2009 11:41:26 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f3ab51c79813
1 #!/usr/bin/env python
2
3 import sys
4 import urlparse
5 import urllib2
6 import tempfile
7 import shutil
8 import subprocess
9
10 def usage():
11 print 'Usage: %s <url> <pattern>' % sys.argv[0]
12 sys.exit(0)
13
14 def geturl(origurl):
15 # get the url
16 url = urlparse.urlsplit(origurl)
17 if not url[0]:
18 url = urlparse.urlsplit('http://%s' % origurl)
19 return url
20
21 if __name__ == '__main__':
22 if len(sys.argv[1:]) != 2:
23 usage()
24 urlparts = geturl(sys.argv[1])
25 url = urlparse.urlunsplit(urlparts)
26
27 # ensure the url is openable
28 try:
29 u = urllib2.urlopen(url)
30 except urllib2.HTTPError, e:
31 print '%s\n%s' % (url, e)
32 sys.exit(1)
33
34 thedir = tempfile.mkdtemp()
35
36 # wget the files
37 wget = subprocess.Popen(['wget', '-r', '-l0',
38 '--no-parent',
39 '--no-check-certificate',
40 '-P', thedir,
41 u.url],
42 stdout = subprocess.PIPE,
43 stderr = subprocess.PIPE,
44 )
45 out, err = wget.communicate()
46 code = wget.returncode
47 if code:
48 sys.exit(code)
49
50 # do da grep
51 grep = subprocess.Popen(['grep', '-r', '-l',
52 sys.argv[2],
53 thedir],
54 stdout = subprocess.PIPE,
55 stderr = subprocess.PIPE,
56 )
57 out, err = grep.communicate()
58 for i in out.split('\n'):
59 print i.replace('%s/' % thedir, '%s://' % urlparts[0], 1)
60
61 destructive = True
62 if destructive:
63 shutil.rmtree(thedir)
64 else:
65 print thedir