Mercurial > hg > config
annotate python/html2flux.py @ 283:14d33956c546
update to modernity
author | Jeff Hammel <jhammel@mozilla.com> |
---|---|
date | Sat, 04 May 2013 18:45:30 -0700 |
parents | 069a739d88ad |
children | fbc033540a34 |
rev | line source |
---|---|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 import sys |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 from lxml import etree |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 from lsex import lsex # local import |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 executables = set([i.rsplit('/', 1)[-1] for i in lsex() ]) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 def printmenu(dl, output, top=True): |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 # XXX should do more checking |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
12 for child in dl.iterchildren(): |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 if not top and child.tag == 'a': |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 print >> output, '[submenu] (%s)' % child.text |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
15 if child.tag == 'dt': |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 label = ' '.join([ i.strip() for i in child.itertext() if i.strip() ]) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
17 if child.tag == 'dd': |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 command = ' '.join([ i.strip() for i in child.itertext() if i.strip() ]) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 executable = command.split()[0] |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 if executable in executables: |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 print >> output, '[exec] (%s) {%s}' % (label, command) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
22 if child.tag == 'dl': |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 printmenu(child, output, top=False) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 if not top: |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 print >> output, '[end]' |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 def main(args = sys.argv[1:]): |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 # setup input, output |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 if args: |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 htmlfile = file(args[0]) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 else: |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 htmlfile = sys.stdin |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
34 html = htmlfile.read() |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
35 fluxout = sys.stdout |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
36 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 # get first element |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 dom = etree.fromstring(html) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 dl = dom.find('.//dl') |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
40 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 printmenu(dl, fluxout) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
43 if __name__ == '__main__': |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 main() |