Mercurial > hg > config
comparison python/find_duplicate_files.py @ 713:1d066bfdb744
merge
| author | Jeff Hammel <k0scist@gmail.com> |
|---|---|
| date | Thu, 16 Oct 2014 18:27:21 -0700 |
| parents | ab831c7621e9 |
| children | dbd2562cb03e |
comparison
equal
deleted
inserted
replaced
| 712:02aec49585ab | 713:1d066bfdb744 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 """ | |
| 5 find duplicate files in a directory | |
| 6 """ | |
| 7 | |
| 8 # imports | |
| 9 import argparse | |
| 10 import os | |
| 11 import subprocess | |
| 12 import sys | |
| 13 | |
| 14 # module globals | |
| 15 __all__ = ['main', 'Parser'] | |
| 16 | |
| 17 class Parser(argparse.ArgumentParser): | |
| 18 """CLI option parser""" | |
| 19 def __init__(self, **kwargs): | |
| 20 kwargs.setdefault('description', __doc__) | |
| 21 argparse.ArgumentParser.__init__(self, **kwargs) | |
| 22 self.add_argument('directory') | |
| 23 self.options = None | |
| 24 | |
| 25 def parse_args(self, *args, **kw): | |
| 26 options = argparse.ArgumentParser.parse_args(self, *args, **kw) | |
| 27 self.validate(options) | |
| 28 self.options = options | |
| 29 return options | |
| 30 | |
| 31 def validate(self, options): | |
| 32 """validate options""" | |
| 33 if not os.path.isdir(options.directory): | |
| 34 self.error("Not a directory: {}".format(options.directory)) | |
| 35 | |
| 36 def main(args=sys.argv[1:]): | |
| 37 """CLI""" | |
| 38 | |
| 39 # parse command line options | |
| 40 parser = Parser() | |
| 41 options = parser.parse_args(args) | |
| 42 | |
| 43 output = subprocess.check_output(['ls', '-l', options.directory]).strip() | |
| 44 rows = [row.strip().split() for row in output.splitlines()[1:]] | |
| 45 | |
| 46 sizes = {} | |
| 47 for row in rows: | |
| 48 size = int(row[4]) | |
| 49 filename = row[-1] | |
| 50 sizes.setdefault(size, []).append(filename) | |
| 51 | |
| 52 duplicates = {} | |
| 53 for size, filenames in sizes.items(): | |
| 54 if len(filenames) < 2: | |
| 55 continue | |
| 56 duplicates[size] = filenames | |
| 57 | |
| 58 for size in sorted(duplicates.keys()): | |
| 59 print ('{} : '.format(size)) | |
| 60 print ('\n'.join(duplicates[size])) | |
| 61 print ('\n') | |
| 62 | |
| 63 if __name__ == '__main__': | |
| 64 main() |
