Mercurial > hg > config
comparison python/find_duplicate_files.py @ 799:dbd2562cb03e
remove old way of doing things; note TODO on replacing
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Fri, 28 Oct 2016 16:06:11 -0700 |
parents | ab831c7621e9 |
children | bea4dd61ae45 |
comparison
equal
deleted
inserted
replaced
798:720e51cb0edb | 799:dbd2562cb03e |
---|---|
9 import argparse | 9 import argparse |
10 import os | 10 import os |
11 import subprocess | 11 import subprocess |
12 import sys | 12 import sys |
13 | 13 |
14 # module globals | |
15 __all__ = ['main', 'Parser'] | |
16 | 14 |
17 class Parser(argparse.ArgumentParser): | 15 class DuplicateFilesParser(argparse.ArgumentParser): |
18 """CLI option parser""" | 16 """CLI option parser""" |
17 | |
19 def __init__(self, **kwargs): | 18 def __init__(self, **kwargs): |
20 kwargs.setdefault('description', __doc__) | 19 kwargs.setdefault('description', __doc__) |
21 argparse.ArgumentParser.__init__(self, **kwargs) | 20 argparse.ArgumentParser.__init__(self, **kwargs) |
22 self.add_argument('directory') | 21 self.add_argument('directory') |
23 self.options = None | 22 self.options = None |
35 | 34 |
36 def main(args=sys.argv[1:]): | 35 def main(args=sys.argv[1:]): |
37 """CLI""" | 36 """CLI""" |
38 | 37 |
39 # parse command line options | 38 # parse command line options |
40 parser = Parser() | 39 parser = DuplicateFilesParser() |
41 options = parser.parse_args(args) | 40 options = parser.parse_args(args) |
42 | 41 |
43 output = subprocess.check_output(['ls', '-l', options.directory]).strip() | 42 # get all files |
44 rows = [row.strip().split() for row in output.splitlines()[1:]] | 43 raise NotImplementedError('TODO') # -> record TODO items |
45 | |
46 sizes = {} | |
47 for row in rows: | |
48 size = int(row[4]) | |
49 filename = row[-1] | |
50 sizes.setdefault(size, []).append(filename) | |
51 | |
52 duplicates = {} | |
53 for size, filenames in sizes.items(): | |
54 if len(filenames) < 2: | |
55 continue | |
56 duplicates[size] = filenames | |
57 | |
58 for size in sorted(duplicates.keys()): | |
59 print ('{} : '.format(size)) | |
60 print ('\n'.join(duplicates[size])) | |
61 print ('\n') | |
62 | 44 |
63 if __name__ == '__main__': | 45 if __name__ == '__main__': |
64 main() | 46 main() |