annotate python/find_duplicate_files.py @ 825:5a74c7ae19cd

a useful module; however it does not work; scrapping
author Jeff Hammel <k0scist@gmail.com>
date Sun, 19 Feb 2017 09:03:52 -0800
parents bea4dd61ae45
children aa9a3850ed56
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
711
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
1 #!/usr/bin/env python
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
3
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
4 """
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
5 find duplicate files in a directory
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
6 """
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
7
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
8 # imports
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
9 import argparse
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
10 import os
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
11 import subprocess
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
12 import sys
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
13
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
14
799
dbd2562cb03e remove old way of doing things; note TODO on replacing
Jeff Hammel <k0scist@gmail.com>
parents: 711
diff changeset
15 class DuplicateFilesParser(argparse.ArgumentParser):
711
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
16 """CLI option parser"""
799
dbd2562cb03e remove old way of doing things; note TODO on replacing
Jeff Hammel <k0scist@gmail.com>
parents: 711
diff changeset
17
711
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
18 def __init__(self, **kwargs):
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
19 kwargs.setdefault('description', __doc__)
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
20 argparse.ArgumentParser.__init__(self, **kwargs)
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
21 self.add_argument('directory')
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
22 self.options = None
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
23
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
24 def parse_args(self, *args, **kw):
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
25 options = argparse.ArgumentParser.parse_args(self, *args, **kw)
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
26 self.validate(options)
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
27 self.options = options
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
28 return options
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
29
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
30 def validate(self, options):
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
31 """validate options"""
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
32 if not os.path.isdir(options.directory):
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
33 self.error("Not a directory: {}".format(options.directory))
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
34
801
bea4dd61ae45 cleanup
Jeff Hammel <k0scist@gmail.com>
parents: 799
diff changeset
35
711
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
36 def main(args=sys.argv[1:]):
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
37 """CLI"""
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
38
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
39 # parse command line options
799
dbd2562cb03e remove old way of doing things; note TODO on replacing
Jeff Hammel <k0scist@gmail.com>
parents: 711
diff changeset
40 parser = DuplicateFilesParser()
711
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
41 options = parser.parse_args(args)
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
42
799
dbd2562cb03e remove old way of doing things; note TODO on replacing
Jeff Hammel <k0scist@gmail.com>
parents: 711
diff changeset
43 # get all files
dbd2562cb03e remove old way of doing things; note TODO on replacing
Jeff Hammel <k0scist@gmail.com>
parents: 711
diff changeset
44 raise NotImplementedError('TODO') # -> record TODO items
711
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
45
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
46 if __name__ == '__main__':
ab831c7621e9 hacky way to note duplicate files
Jeff Hammel <k0scist@gmail.com>
parents:
diff changeset
47 main()