annotate bin/wgrep.py @ 664:ffb75d832afe

make sneaky urls
author Jeff Hammel <k0scist@gmail.com>
date Mon, 07 Apr 2014 19:24:57 -0700
parents f3ab51c79813
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
2
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
3 import sys
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
4 import urlparse
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
5 import urllib2
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
6 import tempfile
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
7 import shutil
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
8 import subprocess
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
9
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
10 def usage():
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
11 print 'Usage: %s <url> <pattern>' % sys.argv[0]
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
12 sys.exit(0)
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
13
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
14 def geturl(origurl):
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
15 # get the url
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
16 url = urlparse.urlsplit(origurl)
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
17 if not url[0]:
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
18 url = urlparse.urlsplit('http://%s' % origurl)
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
19 return url
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
20
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
21 if __name__ == '__main__':
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
22 if len(sys.argv[1:]) != 2:
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
23 usage()
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
24 urlparts = geturl(sys.argv[1])
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
25 url = urlparse.urlunsplit(urlparts)
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
26
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
27 # ensure the url is openable
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
28 try:
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
29 u = urllib2.urlopen(url)
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
30 except urllib2.HTTPError, e:
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
31 print '%s\n%s' % (url, e)
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
32 sys.exit(1)
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
33
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
34 thedir = tempfile.mkdtemp()
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
35
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
36 # wget the files
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
37 wget = subprocess.Popen(['wget', '-r', '-l0',
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
38 '--no-parent',
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
39 '--no-check-certificate',
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
40 '-P', thedir,
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
41 u.url],
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
42 stdout = subprocess.PIPE,
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
43 stderr = subprocess.PIPE,
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
44 )
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
45 out, err = wget.communicate()
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
46 code = wget.returncode
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
47 if code:
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
48 sys.exit(code)
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
49
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
50 # do da grep
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
51 grep = subprocess.Popen(['grep', '-r', '-l',
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
52 sys.argv[2],
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
53 thedir],
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
54 stdout = subprocess.PIPE,
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
55 stderr = subprocess.PIPE,
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
56 )
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
57 out, err = grep.communicate()
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
58 for i in out.split('\n'):
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
59 print i.replace('%s/' % thedir, '%s://' % urlparts[0], 1)
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
60
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
61 destructive = True
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
62 if destructive:
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
63 shutil.rmtree(thedir)
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
64 else:
f3ab51c79813 adding configuration from https://svn.openplans.org/svn/config_jhammel/
k0s <k0scist@gmail.com>
parents:
diff changeset
65 print thedir