Mercurial > hg > config
annotate python/find_duplicate_files.py @ 825:5a74c7ae19cd
a useful module; however it does not work; scrapping
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 19 Feb 2017 09:03:52 -0800 |
parents | bea4dd61ae45 |
children | aa9a3850ed56 |
rev | line source |
---|---|
711
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
3 |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
4 """ |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
5 find duplicate files in a directory |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
6 """ |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
7 |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
8 # imports |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
9 import argparse |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
10 import os |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
11 import subprocess |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
12 import sys |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
13 |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
14 |
799
dbd2562cb03e
remove old way of doing things; note TODO on replacing
Jeff Hammel <k0scist@gmail.com>
parents:
711
diff
changeset
|
15 class DuplicateFilesParser(argparse.ArgumentParser): |
711
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
16 """CLI option parser""" |
799
dbd2562cb03e
remove old way of doing things; note TODO on replacing
Jeff Hammel <k0scist@gmail.com>
parents:
711
diff
changeset
|
17 |
711
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
18 def __init__(self, **kwargs): |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
19 kwargs.setdefault('description', __doc__) |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 argparse.ArgumentParser.__init__(self, **kwargs) |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 self.add_argument('directory') |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
22 self.options = None |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
24 def parse_args(self, *args, **kw): |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 options = argparse.ArgumentParser.parse_args(self, *args, **kw) |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 self.validate(options) |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
27 self.options = options |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
28 return options |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
29 |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
30 def validate(self, options): |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
31 """validate options""" |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
32 if not os.path.isdir(options.directory): |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 self.error("Not a directory: {}".format(options.directory)) |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
34 |
801 | 35 |
711
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
36 def main(args=sys.argv[1:]): |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
37 """CLI""" |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
38 |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 # parse command line options |
799
dbd2562cb03e
remove old way of doing things; note TODO on replacing
Jeff Hammel <k0scist@gmail.com>
parents:
711
diff
changeset
|
40 parser = DuplicateFilesParser() |
711
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
41 options = parser.parse_args(args) |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 |
799
dbd2562cb03e
remove old way of doing things; note TODO on replacing
Jeff Hammel <k0scist@gmail.com>
parents:
711
diff
changeset
|
43 # get all files |
dbd2562cb03e
remove old way of doing things; note TODO on replacing
Jeff Hammel <k0scist@gmail.com>
parents:
711
diff
changeset
|
44 raise NotImplementedError('TODO') # -> record TODO items |
711
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
45 |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 if __name__ == '__main__': |
ab831c7621e9
hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
47 main() |