annotate python/html2flux.py @ 158:c1390e43b437

point pastebin at pastebin.mozilla
author Jeff Hammel <jhammel@mozilla.com>
date Thu, 28 Jul 2011 09:18:15 -0700
parents 069a739d88ad
children fbc033540a34
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
45
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3 import sys
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 from lxml import etree
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6 from lsex import lsex # local import
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7 executables = set([i.rsplit('/', 1)[-1] for i in lsex() ])
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 def printmenu(dl, output, top=True):
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 # XXX should do more checking
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 for child in dl.iterchildren():
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13 if not top and child.tag == 'a':
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14 print >> output, '[submenu] (%s)' % child.text
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
15 if child.tag == 'dt':
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 label = ' '.join([ i.strip() for i in child.itertext() if i.strip() ])
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
17 if child.tag == 'dd':
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18 command = ' '.join([ i.strip() for i in child.itertext() if i.strip() ])
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 executable = command.split()[0]
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 if executable in executables:
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 print >> output, '[exec] (%s) {%s}' % (label, command)
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 if child.tag == 'dl':
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23 printmenu(child, output, top=False)
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 if not top:
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 print >> output, '[end]'
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 def main(args = sys.argv[1:]):
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29 # setup input, output
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 if args:
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 htmlfile = file(args[0])
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 else:
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33 htmlfile = sys.stdin
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34 html = htmlfile.read()
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
35 fluxout = sys.stdout
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 # get first element
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38 dom = etree.fromstring(html)
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 dl = dom.find('.//dl')
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
40
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 printmenu(dl, fluxout)
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
43 if __name__ == '__main__':
069a739d88ad get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
44 main()