Mercurial > hg > config
view python/find_duplicate_files.py @ 787:d265e49b965e
shortcut
author | Jeff Hammel <k0scist@gmail.com> |
---|---|
date | Sun, 25 Sep 2016 16:29:22 -0700 |
parents | ab831c7621e9 |
children | dbd2562cb03e |
line wrap: on
line source
#!/usr/bin/env python # -*- coding: utf-8 -*- """ find duplicate files in a directory """ # imports import argparse import os import subprocess import sys # module globals __all__ = ['main', 'Parser'] class Parser(argparse.ArgumentParser): """CLI option parser""" def __init__(self, **kwargs): kwargs.setdefault('description', __doc__) argparse.ArgumentParser.__init__(self, **kwargs) self.add_argument('directory') self.options = None def parse_args(self, *args, **kw): options = argparse.ArgumentParser.parse_args(self, *args, **kw) self.validate(options) self.options = options return options def validate(self, options): """validate options""" if not os.path.isdir(options.directory): self.error("Not a directory: {}".format(options.directory)) def main(args=sys.argv[1:]): """CLI""" # parse command line options parser = Parser() options = parser.parse_args(args) output = subprocess.check_output(['ls', '-l', options.directory]).strip() rows = [row.strip().split() for row in output.splitlines()[1:]] sizes = {} for row in rows: size = int(row[4]) filename = row[-1] sizes.setdefault(size, []).append(filename) duplicates = {} for size, filenames in sizes.items(): if len(filenames) < 2: continue duplicates[size] = filenames for size in sorted(duplicates.keys()): print ('{} : '.format(size)) print ('\n'.join(duplicates[size])) print ('\n') if __name__ == '__main__': main()