diff bin/wgrep.py @ 0:f3ab51c79813

adding configuration from https://svn.openplans.org/svn/config_jhammel/
author k0s <k0scist@gmail.com>
date Thu, 15 Oct 2009 11:41:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/wgrep.py	Thu Oct 15 11:41:26 2009 -0400
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+import sys
+import urlparse
+import urllib2
+import tempfile
+import shutil
+import subprocess
+
+def usage():
+    print 'Usage: %s <url> <pattern>' % sys.argv[0]
+    sys.exit(0)
+
+def geturl(origurl):
+    # get the url
+    url = urlparse.urlsplit(origurl)
+    if not url[0]:
+        url = urlparse.urlsplit('http://%s' % origurl)
+    return url
+
+if __name__ == '__main__':
+    if len(sys.argv[1:]) != 2:
+        usage()
+    urlparts = geturl(sys.argv[1])
+    url = urlparse.urlunsplit(urlparts)
+
+    # ensure the url is openable
+    try:
+        u = urllib2.urlopen(url)
+    except urllib2.HTTPError, e:
+        print '%s\n%s' % (url, e)
+        sys.exit(1)
+
+    thedir = tempfile.mkdtemp()
+
+    # wget the files
+    wget = subprocess.Popen(['wget', '-r', '-l0',
+                             '--no-parent',
+                             '--no-check-certificate',
+                             '-P', thedir,
+                             u.url],
+                            stdout = subprocess.PIPE,
+                            stderr = subprocess.PIPE,
+                            )
+    out, err = wget.communicate()
+    code = wget.returncode
+    if code:
+        sys.exit(code)
+
+    # do da grep
+    grep = subprocess.Popen(['grep', '-r', '-l', 
+                             sys.argv[2], 
+                             thedir],
+                            stdout = subprocess.PIPE,
+                            stderr = subprocess.PIPE,
+                            )
+    out, err = grep.communicate()
+    for i in out.split('\n'):
+        print i.replace('%s/' % thedir, '%s://' % urlparts[0], 1)
+
+    destructive = True
+    if destructive:
+        shutil.rmtree(thedir)
+    else:
+        print thedir