diff --git a/fido/fido.py b/fido/fido.py index a7573cdb..ca43a508 100755 --- a/fido/fido.py +++ b/fido/fido.py @@ -743,37 +743,16 @@ def list_files(roots, recurse=False): break -def main(args=None): - if not args: - args = sys.argv[1:] - - parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'], fromfile_prefix_chars='@', formatter_class=RawTextHelpFormatter) - parser.add_argument('-v', default=False, action='store_true', help='show version information') - parser.add_argument('-q', default=False, action='store_true', help='run (more) quietly') - parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories') - parser.add_argument('-zip', default=False, action='store_true', help='recurse into zip and tar files') - parser.add_argument('-noextension', default=False, action='store_true', help='disable extension matching, reduces number of matches but may reduce false positives') - parser.add_argument('-nocontainer', default=False, action='store_true', help='disable deep scan of container documents, increases speed but may reduce accuracy with big files') - parser.add_argument('-pronom_only', default=False, action='store_true', help='disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results') - - group = parser.add_mutually_exclusive_group() - group.add_argument('-input', default=False, help='file containing a list of files to check, one per line. - means stdin') - group.add_argument('files', nargs='*', default=[], metavar='FILE', help='files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.') - - parser.add_argument('-filename', default=None, help='filename if file contents passed through STDIN') - parser.add_argument('-useformats', metavar='INCLUDEPUIDS', default=None, help='comma separated string of formats to use in identification') - parser.add_argument('-nouseformats', metavar='EXCLUDEPUIDS', default=None, help='comma separated string of formats not to use in identification') - parser.add_argument('-matchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use on match. See nomatchprintf, README.txt.') - parser.add_argument('-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt') - parser.add_argument('-bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default=' + str(defaults['bufsize']) + ' bytes)') - parser.add_argument('-container_bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default=' + str(defaults['container_bufsize']) + ' bytes)') - parser.add_argument('-loadformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.') - parser.add_argument('-confdir', default=CONFIG_DIR, help='configuration directory to load_fido_xml, for example, the format specifications from.') - - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - args = parser.parse_args(args) +def main( + version=False, quiet=False, + recurse=False, recurse_compressed_archives=False, + noextension=False, nocontainer=False, pronom_only=False, + check_list=None, files=None, filename=None, + useformats=None, nouseformats=None, + matchprintf=None, nomatchprintf=None, + bufsize=None, container_bufsize=None, + loadformats=None, confdir=None, handle_matches=None + ): t0 = time.clock() @@ -784,83 +763,115 @@ def main(args=None): defaults['xml_fidoExtensionSignature'] = versions.fido_extension_signature defaults['format_files'] = [defaults['xml_pronomSignature']] - if args.pronom_only: + if pronom_only: versionHeader = "FIDO v{0} ({1}, {2})\n".format(__version__, defaults['xml_pronomSignature'], defaults['containersignature_file']) else: versionHeader = "FIDO v{0} ({1}, {2}, {3})\n".format(__version__, defaults['xml_pronomSignature'], defaults['containersignature_file'], defaults['xml_fidoExtensionSignature']) defaults['format_files'].append(defaults['xml_fidoExtensionSignature']) - if args.v: - sys.stdout.write(versionHeader) - sys.exit(0) - - if args.matchprintf: + if matchprintf: try: - args.matchprintf = args.matchprintf.decode('string_escape') + matchprintf = matchprintf.decode('string_escape') except AttributeError: - args.matchprintf = args.matchprintf.replace(r"\n", "\n") - args.matchprintf = args.matchprintf.replace(r"\t", "\t") - if args.nomatchprintf: + matchprintf = matchprintf.replace(r"\n", "\n") + matchprintf = matchprintf.replace(r"\t", "\t") + + if nomatchprintf: try: - args.nomatchprintf = args.nomatchprintf.decode('string_escape') + nomatchprintf = nomatchprintf.decode('string_escape') except AttributeError: - args.matchprintf = args.matchprintf.replace(r"\n", "\n") - args.matchprintf = args.matchprintf.replace(r"\t", "\t") + matchprintf = matchprintf.replace(r"\n", "\n") + matchprintf = matchprintf.replace(r"\t", "\t") fido = Fido( - quiet=args.q, - bufsize=args.bufsize, - container_bufsize=args.container_bufsize, - printmatch=args.matchprintf, - printnomatch=args.nomatchprintf, - zip=args.zip, - nocontainer=args.nocontainer, - conf_dir=args.confdir) + quiet=quiet, + bufsize=bufsize, + container_bufsize=container_bufsize, + printmatch=matchprintf, + printnomatch=nomatchprintf, + zip=recurse_compressed_archives, + nocontainer=nocontainer, + conf_dir=confdir, + handle_matches=handle_matches + ) # TODO: Allow conf options to be dis-included - if args.loadformats: - for file in args.loadformats.split(','): + if loadformats: + for file in loadformats.split(','): fido.load_fido_xml(file) # TODO: remove from maps - if args.useformats: - args.useformats = args.useformats.split(',') - fido.formats = [f for f in fido.formats if f.find('puid').text in args.useformats] - elif args.nouseformats: - args.nouseformats = args.nouseformats.split(',') - fido.formats = [f for f in fido.formats if f.find('puid').text not in args.nouseformats] + if useformats: + useformats = useformats.split(',') + fido.formats = [f for f in fido.formats if f.find('puid').text in useformats] + elif nouseformats: + nouseformats = nouseformats.split(',') + fido.formats = [f for f in fido.formats if f.find('puid').text not in nouseformats] # Set up to use stdin, or open input files: - if args.input == '-': - args.files = sys.stdin - elif args.input: - args.files = open(args.input, 'r') + if check_list == '-': + files = sys.stdin + elif check_list: + files = open(check_list, 'r') # RUN try: - if not args.q: + if not quiet: sys.stderr.write(versionHeader) sys.stderr.flush() - if (not args.input) and len(args.files) == 1 and args.files[0] == '-': + if (not check_list) and len(files) == 1 and files[0] == '-': if fido.zip: raise RuntimeError("Multiple content read from stdin not yet supported.") sys.exit(1) - fido.identify_multi_object_stream(sys.stdin, extension=not args.noextension) + fido.identify_multi_object_stream(sys.stdin, extension=not noextension) else: - fido.identify_stream(sys.stdin, args.filename, extension=not args.noextension) + fido.identify_stream(sys.stdin, filename, extension=not noextension) else: - for file in list_files(args.files, args.recurse): - fido.identify_file(file, extension=not args.noextension) + for file in list_files(files, recurse): + fido.identify_file(file, extension=not noextension) except KeyboardInterrupt: msg = "FIDO: Interrupt while identifying file {0}" sys.stderr.write(msg.format(fido.current_file)) sys.exit(1) - if not args.q: + if not quiet: sys.stdout.flush() fido.print_summary(time.clock() - t0) sys.stderr.flush() if __name__ == '__main__': - main() + # run as a command line tool instead of as a module + parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'], fromfile_prefix_chars='@', formatter_class=RawTextHelpFormatter) + parser.add_argument('-v', default=False, action='store_true', dest='version', help='show version information') + parser.add_argument('-q', default=False, action='store_true', dest='quiet', help='run (more) quietly') + parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories') + parser.add_argument('-zip', default=False, action='store_true', dest='recurse_compressed_archives', help='recurse into zip and tar files') + parser.add_argument('-noextension', default=False, action='store_true', help='disable extension matching, reduces number of matches but may reduce false positives') + parser.add_argument('-nocontainer', default=False, action='store_true', help='disable deep scan of container documents, increases speed but may reduce accuracy with big files') + parser.add_argument('-pronom_only', default=False, action='store_true', help='disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results') + + group = parser.add_mutually_exclusive_group() + group.add_argument('-input', default=False, dest='check_list', help='file containing a list of files to check, one per line. - means stdin') + group.add_argument('files', nargs='*', default=[], metavar='FILE', help='files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.') + + parser.add_argument('-filename', default=None, help='filename if file contents passed through STDIN') + parser.add_argument('-useformats', metavar='INCLUDEPUIDS', default=None, help='comma separated string of formats to use in identification') + parser.add_argument('-nouseformats', metavar='EXCLUDEPUIDS', default=None, help='comma separated string of formats not to use in identification') + parser.add_argument('-matchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use on match. See nomatchprintf, README.txt.') + parser.add_argument('-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt') + parser.add_argument('-bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default=' + str(defaults['bufsize']) + ' bytes)') + parser.add_argument('-container_bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default=' + str(defaults['container_bufsize']) + ' bytes)') + parser.add_argument('-loadformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.') + parser.add_argument('-confdir', default=CONFIG_DIR, help='configuration directory to load_fido_xml, for example, the format specifications from.') + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + + args = parser.parse_args() + + if args.version: + sys.stdout.write(versionHeader) + else: + main(**vars(args))