Support multiple input files with -o being a directory

This change makes it possible for scour to consume multiple input
files in one command invocation.  E.g.

     $ scour file1.svg file2.svgz ... output-directory
     # xargs friendly variant
     $ scour -o output-directory file1.svg file2.svgz ...

This avoids most of the "startup" overhead in python and scour when
many files are being processed.  On about a 100 of (already scour'ed)
gnuplot svg graphs, this change provides an almost 40% speed up compared
to a shell alternative:

     # Original shell pipeline (~29s)
     # Note; for bash, rewriting this without the "basename"-call does
     # not seem to improve performance considerably.
     $ for FILE in input/[01]* ; do \
         python3 -m scour.scour "$FILE" output/"$(basename "$FILE")" > /dev/null ; \
       done
    # With this patch (~16s)
    $ python3 -m scour.scour input/[01]* output > /dev/null

Signed-off-by: Niels Thykier <niels@thykier.net>
This commit is contained in:
Niels Thykier 2018-03-18 11:15:46 +00:00
parent 82ce83acab
commit c42dc6b793
2 changed files with 40 additions and 20 deletions

View file

@ -3630,10 +3630,12 @@ class HeaderedFormatter(optparse.IndentedHelpFormatter):
# GZ: would prefer this to be in a function or class scope, but tests etc need
# access to the defaults anyway
_options_parser = optparse.OptionParser(
usage="%prog [INPUT.SVG [OUTPUT.SVG]] [OPTIONS]",
usage="%prog [INPUT.SVG [[... INPUT.SVG] OUTPUT]] [OPTIONS]",
description=("If the input/output files are not specified, stdin/stdout are used. "
"If the input/output files are specified with a svgz extension, "
"then compressed SVG is assumed."),
"then compressed SVG is assumed. Multiple input files can be specified "
"when output is a directory. The ouput files will be the basename of "
"the input files."),
formatter=HeaderedFormatter(max_help_position=33),
version=VER)
@ -3648,11 +3650,11 @@ _options_parser.add_option("-v", "--verbose",
action="store_true", dest="verbose", default=False,
help="verbose output (statistics, etc.)")
_options_parser.add_option("-i",
action="store", dest="infilename", metavar="INPUT.SVG",
help="alternative way to specify input filename")
action="append", dest="infilenames", metavar="INPUT.SVG",
help="alternative way to specify input filenames")
_options_parser.add_option("-o",
action="store", dest="outfilename", metavar="OUTPUT.SVG",
help="alternative way to specify output filename")
action="store", dest="outfilename", metavar="OUTPUT",
help="alternative way to specify output (either a file or a directory)")
_option_group_optimization = optparse.OptionGroup(_options_parser, "Optimization")
_option_group_optimization.add_option("--set-precision",
@ -3766,10 +3768,11 @@ def parse_args(args=None, ignore_additional_args=False):
options, rargs = _options_parser.parse_args(args)
if rargs:
if not options.infilename:
options.infilename = rargs.pop(0)
if not options.outfilename and rargs:
options.outfilename = rargs.pop(0)
if not options.outfilename and len(rargs) > 1:
options.outfilename = rargs.pop()
if not options.infilenames:
options.infilenames = rargs
rargs = []
if not ignore_additional_args and rargs:
_options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
if options.digits < 1:
@ -3782,8 +3785,15 @@ def parse_args(args=None, ignore_additional_args=False):
_options_parser.error("Invalid value for --indent, see --help")
if options.indent_depth < 0:
_options_parser.error("Value for --nindent should be positive (or zero), see --help")
if options.infilename and options.outfilename and options.infilename == options.outfilename:
if options.infilenames:
if len(options.infilenames) > 1:
if not options.outfilename or not os.path.isdir(options.outfilename):
_options_parser.error("Multiple input files requires an directory as output (-o)")
elif len(options.infilenames) == 0 and options.outfilename and options.infilenames[0] == options.outfilename:
_options_parser.error("Input filename is the same as output filename")
else:
if options.outfilename and os.path.isdir(options.outfilename):
_options_parser.error("Cannot use a directory as output when input is stdin")
return options
@ -3811,12 +3821,12 @@ def maybe_gziped_file(filename, mode="r"):
return open(filename, mode)
def getInOut(options):
def getInOut(input_file, options):
references_relative_to = None
if options.infilename:
infile = maybe_gziped_file(options.infilename, "rb")
if input_file is not None:
infile = maybe_gziped_file(input_file, "rb")
# GZ: could catch a raised IOError here and report
references_relative_to = os.path.dirname(options.infilename)
references_relative_to = os.path.dirname(input_file)
else:
# GZ: could sniff for gzip compression here
#
@ -3830,7 +3840,11 @@ def getInOut(options):
_options_parser.error("No input file specified, see --help for detailed usage information")
if options.outfilename:
outfile = maybe_gziped_file(options.outfilename, "wb")
dest = options.outfilename
if os.path.isdir(dest):
assert input_file is not None
dest = os.path.join(dest, os.path.basename(input_file))
outfile = maybe_gziped_file(dest, "wb")
else:
# open the binary buffer of stdout as the output is already encoded
try:
@ -3901,7 +3915,13 @@ def start(options, input, output, references_relative_to=None):
def run():
options = parse_args()
(input, input_relative_to, output) = getInOut(options)
input_files = options.infilenames if options.infilenames is not None else []
if input_files and input_files[0] is not None:
for filename in input_files:
(input, input_relative_to, output) = getInOut(filename, options)
start(options, input, output, input_relative_to)
else:
(input, input_relative_to, output) = getInOut(None, options)
start(options, input, output, input_relative_to)

View file

@ -2358,7 +2358,7 @@ class DoNotStripXmlSpaceAttribute(unittest.TestCase):
class CommandLineUsage(unittest.TestCase):
USAGE_STRING = "Usage: scour [INPUT.SVG [OUTPUT.SVG]] [OPTIONS]"
USAGE_STRING = "Usage: scour [INPUT.SVG [[... INPUT.SVG] OUTPUT]] [OPTIONS]"
MINIMAL_SVG = '<?xml version="1.0" encoding="UTF-8"?>\n' \
'<svg xmlns="http://www.w3.org/2000/svg"/>\n'
TEMP_SVG_FILE = 'testscour_temp.svg'