Mercurial > hg > config
annotate python/html2flux.py @ 333:998d3c276b25
http://askubuntu.com/questions/168117/how-to-automatically-add-meta-tags-to-screenshot?rq=1
author | Jeff Hammel <jhammel@mozilla.com> |
---|---|
date | Tue, 18 Jun 2013 14:44:14 -0700 |
parents | ee3c1b65d6d1 |
children | a43d0205f80b |
rev | line source |
---|---|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 |
292 | 3 """ |
4 transform an HTML <dl> file into a fluxbox menu | |
5 if no file give, read from stdin | |
6 | |
7 <dl><a>submenu name</a> | |
8 <dt>program label</dt><dd>command</dd> | |
9 <dt>another program label</dt><dd>command2</dd> | |
10 </dl> | |
11 | |
12 x-form -> internal format: | |
13 | |
14 ('submenu name': [('program label', 'command'), | |
15 ('another program label', 'command2')]) | |
16 """ | |
17 | |
18 import optparse | |
294 | 19 import os |
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 import sys |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 from lxml import etree |
292 | 22 from lsex import lsex # local import |
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 |
292 | 24 # available executables |
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 executables = set([i.rsplit('/', 1)[-1] for i in lsex() ]) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 |
293 | 27 def readmenu(dl, output, top=True): |
296 | 28 """read menu from an <dl> tag""" |
29 # TODO: probably don't really need lxml | |
292 | 30 |
31 menu_items = [] | |
32 name = None # menu name | |
294 | 33 firstchild = True |
34 label = None | |
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
35 for child in dl.iterchildren(): |
292 | 36 |
294 | 37 if not top and child.tag == 'a' and firstchild: |
292 | 38 # TODO: better way of labeling this! |
294 | 39 name = child.text.strip() |
292 | 40 |
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 if child.tag == 'dt': |
292 | 42 # item label |
43 label = ' '.join([i.strip() for i in child.itertext() if i.strip()]) | |
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
44 if child.tag == 'dd': |
294 | 45 # command |
292 | 46 command = ' '.join([i.strip() for i in child.itertext() if i.strip()]) |
294 | 47 # TODO: classes |
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
48 executable = command.split()[0] |
292 | 49 if executable in executables or os.path.isabs(executable): |
294 | 50 menu_items.append((label, command)) |
51 | |
52 # submenu | |
53 if child.tag == 'dl': | |
54 menu_items.append(readmenu(child, output, top=False)) | |
55 | |
56 return (name, menu_items) | |
292 | 57 |
294 | 58 def printflux(name, menu, output, top=True): |
59 """ | |
60 - output: file-like object for writing | |
61 """ | |
296 | 62 |
63 # print [submenu] tag for this menu | |
294 | 64 name = name or '' |
296 | 65 if not top: |
66 print >> output, '[submenu] (%s)' % name | |
67 | |
68 # print menu items | |
294 | 69 for name, item in menu: |
70 if isinstance(item, basestring): | |
71 # command | |
72 print >> output, '[exec] (%s) {%s}' % (name, item) | |
73 else: | |
74 # submenu | |
75 printflux(name, item, output, top=False) | |
296 | 76 |
77 # print end of this submenu | |
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
78 if not top: |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
79 print >> output, '[end]' |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
80 |
293 | 81 def printmenu(dl, output): |
294 | 82 name, menu = readmenu(dl, output) |
83 printflux(name, menu, output) | |
293 | 84 |
292 | 85 def main(args=sys.argv[1:]): |
293 | 86 """command line interface""" |
292 | 87 |
88 # parse command line option | |
89 usage = '%prog [options] [menu.html]' | |
90 parser = optparse.OptionParser(usage=usage, | |
91 description=__doc__) | |
92 parser.add_option('--collapse', dest='collapse', | |
93 action='store_true', default=False, | |
94 help="collapse menus with a single item to that item") | |
95 parser.add_option('-o', '--output', dest='output', | |
96 help="output file [Default: <stdout>]") | |
97 options, args = parser.parse_args(args) | |
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
98 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
99 # setup input, output |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
100 if args: |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
101 htmlfile = file(args[0]) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
102 else: |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
103 htmlfile = sys.stdin |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
104 html = htmlfile.read() |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
105 fluxout = sys.stdout |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
106 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
107 # get first element |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
108 dom = etree.fromstring(html) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
109 dl = dom.find('.//dl') |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
110 |
292 | 111 # print to stdout |
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
112 printmenu(dl, fluxout) |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
113 |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
114 if __name__ == '__main__': |
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
115 main() |