Mercurial > hg > config
comparison python/find_duplicate_files.py @ 711:ab831c7621e9
hacky way to note duplicate files
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Thu, 16 Oct 2014 11:25:49 -0700 |
parents | |
children | dbd2562cb03e |
comparison
equal
deleted
inserted
replaced
710:7f910ce4da04 | 711:ab831c7621e9 |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 """ | |
5 find duplicate files in a directory | |
6 """ | |
7 | |
8 # imports | |
9 import argparse | |
10 import os | |
11 import subprocess | |
12 import sys | |
13 | |
14 # module globals | |
15 __all__ = ['main', 'Parser'] | |
16 | |
17 class Parser(argparse.ArgumentParser): | |
18 """CLI option parser""" | |
19 def __init__(self, **kwargs): | |
20 kwargs.setdefault('description', __doc__) | |
21 argparse.ArgumentParser.__init__(self, **kwargs) | |
22 self.add_argument('directory') | |
23 self.options = None | |
24 | |
25 def parse_args(self, *args, **kw): | |
26 options = argparse.ArgumentParser.parse_args(self, *args, **kw) | |
27 self.validate(options) | |
28 self.options = options | |
29 return options | |
30 | |
31 def validate(self, options): | |
32 """validate options""" | |
33 if not os.path.isdir(options.directory): | |
34 self.error("Not a directory: {}".format(options.directory)) | |
35 | |
36 def main(args=sys.argv[1:]): | |
37 """CLI""" | |
38 | |
39 # parse command line options | |
40 parser = Parser() | |
41 options = parser.parse_args(args) | |
42 | |
43 output = subprocess.check_output(['ls', '-l', options.directory]).strip() | |
44 rows = [row.strip().split() for row in output.splitlines()[1:]] | |
45 | |
46 sizes = {} | |
47 for row in rows: | |
48 size = int(row[4]) | |
49 filename = row[-1] | |
50 sizes.setdefault(size, []).append(filename) | |
51 | |
52 duplicates = {} | |
53 for size, filenames in sizes.items(): | |
54 if len(filenames) < 2: | |
55 continue | |
56 duplicates[size] = filenames | |
57 | |
58 for size in sorted(duplicates.keys()): | |
59 print ('{} : '.format(size)) | |
60 print ('\n'.join(duplicates[size])) | |
61 print ('\n') | |
62 | |
63 if __name__ == '__main__': | |
64 main() |