# HG changeset patch # User Jeff Hammel # Date 1477695971 25200 # Node ID dbd2562cb03efa8685822b8bdd66f87acc3a30bc # Parent 720e51cb0edbc1f9932fbfe5215c464bb935d45c remove old way of doing things; note TODO on replacing diff -r 720e51cb0edb -r dbd2562cb03e python/find_duplicate_files.py --- a/python/find_duplicate_files.py Fri Oct 28 10:45:46 2016 -0700 +++ b/python/find_duplicate_files.py Fri Oct 28 16:06:11 2016 -0700 @@ -11,11 +11,10 @@ import subprocess import sys -# module globals -__all__ = ['main', 'Parser'] -class Parser(argparse.ArgumentParser): +class DuplicateFilesParser(argparse.ArgumentParser): """CLI option parser""" + def __init__(self, **kwargs): kwargs.setdefault('description', __doc__) argparse.ArgumentParser.__init__(self, **kwargs) @@ -37,28 +36,11 @@ """CLI""" # parse command line options - parser = Parser() + parser = DuplicateFilesParser() options = parser.parse_args(args) - output = subprocess.check_output(['ls', '-l', options.directory]).strip() - rows = [row.strip().split() for row in output.splitlines()[1:]] - - sizes = {} - for row in rows: - size = int(row[4]) - filename = row[-1] - sizes.setdefault(size, []).append(filename) - - duplicates = {} - for size, filenames in sizes.items(): - if len(filenames) < 2: - continue - duplicates[size] = filenames - - for size in sorted(duplicates.keys()): - print ('{} : '.format(size)) - print ('\n'.join(duplicates[size])) - print ('\n') + # get all files + raise NotImplementedError('TODO') # -> record TODO items if __name__ == '__main__': main()