comparison python/find_duplicate_files.py @ 799:dbd2562cb03e

remove old way of doing things; note TODO on replacing
author Jeff Hammel <k0scist@gmail.com>
date Fri, 28 Oct 2016 16:06:11 -0700
parents ab831c7621e9
children bea4dd61ae45
comparison
equal deleted inserted replaced
798:720e51cb0edb 799:dbd2562cb03e
9 import argparse 9 import argparse
10 import os 10 import os
11 import subprocess 11 import subprocess
12 import sys 12 import sys
13 13
14 # module globals
15 __all__ = ['main', 'Parser']
16 14
17 class Parser(argparse.ArgumentParser): 15 class DuplicateFilesParser(argparse.ArgumentParser):
18 """CLI option parser""" 16 """CLI option parser"""
17
19 def __init__(self, **kwargs): 18 def __init__(self, **kwargs):
20 kwargs.setdefault('description', __doc__) 19 kwargs.setdefault('description', __doc__)
21 argparse.ArgumentParser.__init__(self, **kwargs) 20 argparse.ArgumentParser.__init__(self, **kwargs)
22 self.add_argument('directory') 21 self.add_argument('directory')
23 self.options = None 22 self.options = None
35 34
36 def main(args=sys.argv[1:]): 35 def main(args=sys.argv[1:]):
37 """CLI""" 36 """CLI"""
38 37
39 # parse command line options 38 # parse command line options
40 parser = Parser() 39 parser = DuplicateFilesParser()
41 options = parser.parse_args(args) 40 options = parser.parse_args(args)
42 41
43 output = subprocess.check_output(['ls', '-l', options.directory]).strip() 42 # get all files
44 rows = [row.strip().split() for row in output.splitlines()[1:]] 43 raise NotImplementedError('TODO') # -> record TODO items
45
46 sizes = {}
47 for row in rows:
48 size = int(row[4])
49 filename = row[-1]
50 sizes.setdefault(size, []).append(filename)
51
52 duplicates = {}
53 for size, filenames in sizes.items():
54 if len(filenames) < 2:
55 continue
56 duplicates[size] = filenames
57
58 for size in sorted(duplicates.keys()):
59 print ('{} : '.format(size))
60 print ('\n'.join(duplicates[size]))
61 print ('\n')
62 44
63 if __name__ == '__main__': 45 if __name__ == '__main__':
64 main() 46 main()