Mercurial > hg > config
view bin/wgrep.py @ 925:a92db57f62f8 default tip
add lxml
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Mon, 20 Jan 2025 09:20:00 -0800 |
parents | f3ab51c79813 |
children |
line wrap: on
line source
#!/usr/bin/env python import sys import urlparse import urllib2 import tempfile import shutil import subprocess def usage(): print 'Usage: %s <url> <pattern>' % sys.argv[0] sys.exit(0) def geturl(origurl): # get the url url = urlparse.urlsplit(origurl) if not url[0]: url = urlparse.urlsplit('http://%s' % origurl) return url if __name__ == '__main__': if len(sys.argv[1:]) != 2: usage() urlparts = geturl(sys.argv[1]) url = urlparse.urlunsplit(urlparts) # ensure the url is openable try: u = urllib2.urlopen(url) except urllib2.HTTPError, e: print '%s\n%s' % (url, e) sys.exit(1) thedir = tempfile.mkdtemp() # wget the files wget = subprocess.Popen(['wget', '-r', '-l0', '--no-parent', '--no-check-certificate', '-P', thedir, u.url], stdout = subprocess.PIPE, stderr = subprocess.PIPE, ) out, err = wget.communicate() code = wget.returncode if code: sys.exit(code) # do da grep grep = subprocess.Popen(['grep', '-r', '-l', sys.argv[2], thedir], stdout = subprocess.PIPE, stderr = subprocess.PIPE, ) out, err = grep.communicate() for i in out.split('\n'): print i.replace('%s/' % thedir, '%s://' % urlparts[0], 1) destructive = True if destructive: shutil.rmtree(thedir) else: print thedir