Support multiple input files with -o being a directory
This change makes it possible for scour to consume multiple input
files in one command invocation. E.g.
$ scour file1.svg file2.svgz ... output-directory
# xargs friendly variant
$ scour -o output-directory file1.svg file2.svgz ...
This avoids most of the "startup" overhead in python and scour when
many files are being processed. On about a 100 of (already scour'ed)
gnuplot svg graphs, this change provides an almost 40% speed up compared
to a shell alternative:
# Original shell pipeline (~29s)
# Note; for bash, rewriting this without the "basename"-call does
# not seem to improve performance considerably.
$ for FILE in input/[01]* ; do \
python3 -m scour.scour "$FILE" output/"$(basename "$FILE")" > /dev/null ; \
done
# With this patch (~16s)
$ python3 -m scour.scour input/[01]* output > /dev/null
Signed-off-by: Niels Thykier <niels@thykier.net>
This commit is contained in:
parent
82ce83acab
commit
c42dc6b793
2 changed files with 40 additions and 20 deletions
|
|
@ -3630,10 +3630,12 @@ class HeaderedFormatter(optparse.IndentedHelpFormatter):
|
||||||
# GZ: would prefer this to be in a function or class scope, but tests etc need
|
# GZ: would prefer this to be in a function or class scope, but tests etc need
|
||||||
# access to the defaults anyway
|
# access to the defaults anyway
|
||||||
_options_parser = optparse.OptionParser(
|
_options_parser = optparse.OptionParser(
|
||||||
usage="%prog [INPUT.SVG [OUTPUT.SVG]] [OPTIONS]",
|
usage="%prog [INPUT.SVG [[... INPUT.SVG] OUTPUT]] [OPTIONS]",
|
||||||
description=("If the input/output files are not specified, stdin/stdout are used. "
|
description=("If the input/output files are not specified, stdin/stdout are used. "
|
||||||
"If the input/output files are specified with a svgz extension, "
|
"If the input/output files are specified with a svgz extension, "
|
||||||
"then compressed SVG is assumed."),
|
"then compressed SVG is assumed. Multiple input files can be specified "
|
||||||
|
"when output is a directory. The ouput files will be the basename of "
|
||||||
|
"the input files."),
|
||||||
formatter=HeaderedFormatter(max_help_position=33),
|
formatter=HeaderedFormatter(max_help_position=33),
|
||||||
version=VER)
|
version=VER)
|
||||||
|
|
||||||
|
|
@ -3648,11 +3650,11 @@ _options_parser.add_option("-v", "--verbose",
|
||||||
action="store_true", dest="verbose", default=False,
|
action="store_true", dest="verbose", default=False,
|
||||||
help="verbose output (statistics, etc.)")
|
help="verbose output (statistics, etc.)")
|
||||||
_options_parser.add_option("-i",
|
_options_parser.add_option("-i",
|
||||||
action="store", dest="infilename", metavar="INPUT.SVG",
|
action="append", dest="infilenames", metavar="INPUT.SVG",
|
||||||
help="alternative way to specify input filename")
|
help="alternative way to specify input filenames")
|
||||||
_options_parser.add_option("-o",
|
_options_parser.add_option("-o",
|
||||||
action="store", dest="outfilename", metavar="OUTPUT.SVG",
|
action="store", dest="outfilename", metavar="OUTPUT",
|
||||||
help="alternative way to specify output filename")
|
help="alternative way to specify output (either a file or a directory)")
|
||||||
|
|
||||||
_option_group_optimization = optparse.OptionGroup(_options_parser, "Optimization")
|
_option_group_optimization = optparse.OptionGroup(_options_parser, "Optimization")
|
||||||
_option_group_optimization.add_option("--set-precision",
|
_option_group_optimization.add_option("--set-precision",
|
||||||
|
|
@ -3766,10 +3768,11 @@ def parse_args(args=None, ignore_additional_args=False):
|
||||||
options, rargs = _options_parser.parse_args(args)
|
options, rargs = _options_parser.parse_args(args)
|
||||||
|
|
||||||
if rargs:
|
if rargs:
|
||||||
if not options.infilename:
|
if not options.outfilename and len(rargs) > 1:
|
||||||
options.infilename = rargs.pop(0)
|
options.outfilename = rargs.pop()
|
||||||
if not options.outfilename and rargs:
|
if not options.infilenames:
|
||||||
options.outfilename = rargs.pop(0)
|
options.infilenames = rargs
|
||||||
|
rargs = []
|
||||||
if not ignore_additional_args and rargs:
|
if not ignore_additional_args and rargs:
|
||||||
_options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
|
_options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
|
||||||
if options.digits < 1:
|
if options.digits < 1:
|
||||||
|
|
@ -3782,8 +3785,15 @@ def parse_args(args=None, ignore_additional_args=False):
|
||||||
_options_parser.error("Invalid value for --indent, see --help")
|
_options_parser.error("Invalid value for --indent, see --help")
|
||||||
if options.indent_depth < 0:
|
if options.indent_depth < 0:
|
||||||
_options_parser.error("Value for --nindent should be positive (or zero), see --help")
|
_options_parser.error("Value for --nindent should be positive (or zero), see --help")
|
||||||
if options.infilename and options.outfilename and options.infilename == options.outfilename:
|
if options.infilenames:
|
||||||
_options_parser.error("Input filename is the same as output filename")
|
if len(options.infilenames) > 1:
|
||||||
|
if not options.outfilename or not os.path.isdir(options.outfilename):
|
||||||
|
_options_parser.error("Multiple input files requires an directory as output (-o)")
|
||||||
|
elif len(options.infilenames) == 0 and options.outfilename and options.infilenames[0] == options.outfilename:
|
||||||
|
_options_parser.error("Input filename is the same as output filename")
|
||||||
|
else:
|
||||||
|
if options.outfilename and os.path.isdir(options.outfilename):
|
||||||
|
_options_parser.error("Cannot use a directory as output when input is stdin")
|
||||||
|
|
||||||
return options
|
return options
|
||||||
|
|
||||||
|
|
@ -3811,12 +3821,12 @@ def maybe_gziped_file(filename, mode="r"):
|
||||||
return open(filename, mode)
|
return open(filename, mode)
|
||||||
|
|
||||||
|
|
||||||
def getInOut(options):
|
def getInOut(input_file, options):
|
||||||
references_relative_to = None
|
references_relative_to = None
|
||||||
if options.infilename:
|
if input_file is not None:
|
||||||
infile = maybe_gziped_file(options.infilename, "rb")
|
infile = maybe_gziped_file(input_file, "rb")
|
||||||
# GZ: could catch a raised IOError here and report
|
# GZ: could catch a raised IOError here and report
|
||||||
references_relative_to = os.path.dirname(options.infilename)
|
references_relative_to = os.path.dirname(input_file)
|
||||||
else:
|
else:
|
||||||
# GZ: could sniff for gzip compression here
|
# GZ: could sniff for gzip compression here
|
||||||
#
|
#
|
||||||
|
|
@ -3830,7 +3840,11 @@ def getInOut(options):
|
||||||
_options_parser.error("No input file specified, see --help for detailed usage information")
|
_options_parser.error("No input file specified, see --help for detailed usage information")
|
||||||
|
|
||||||
if options.outfilename:
|
if options.outfilename:
|
||||||
outfile = maybe_gziped_file(options.outfilename, "wb")
|
dest = options.outfilename
|
||||||
|
if os.path.isdir(dest):
|
||||||
|
assert input_file is not None
|
||||||
|
dest = os.path.join(dest, os.path.basename(input_file))
|
||||||
|
outfile = maybe_gziped_file(dest, "wb")
|
||||||
else:
|
else:
|
||||||
# open the binary buffer of stdout as the output is already encoded
|
# open the binary buffer of stdout as the output is already encoded
|
||||||
try:
|
try:
|
||||||
|
|
@ -3901,8 +3915,14 @@ def start(options, input, output, references_relative_to=None):
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
options = parse_args()
|
options = parse_args()
|
||||||
(input, input_relative_to, output) = getInOut(options)
|
input_files = options.infilenames if options.infilenames is not None else []
|
||||||
start(options, input, output, input_relative_to)
|
if input_files and input_files[0] is not None:
|
||||||
|
for filename in input_files:
|
||||||
|
(input, input_relative_to, output) = getInOut(filename, options)
|
||||||
|
start(options, input, output, input_relative_to)
|
||||||
|
else:
|
||||||
|
(input, input_relative_to, output) = getInOut(None, options)
|
||||||
|
start(options, input, output, input_relative_to)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
||||||
|
|
@ -2358,7 +2358,7 @@ class DoNotStripXmlSpaceAttribute(unittest.TestCase):
|
||||||
|
|
||||||
class CommandLineUsage(unittest.TestCase):
|
class CommandLineUsage(unittest.TestCase):
|
||||||
|
|
||||||
USAGE_STRING = "Usage: scour [INPUT.SVG [OUTPUT.SVG]] [OPTIONS]"
|
USAGE_STRING = "Usage: scour [INPUT.SVG [[... INPUT.SVG] OUTPUT]] [OPTIONS]"
|
||||||
MINIMAL_SVG = '<?xml version="1.0" encoding="UTF-8"?>\n' \
|
MINIMAL_SVG = '<?xml version="1.0" encoding="UTF-8"?>\n' \
|
||||||
'<svg xmlns="http://www.w3.org/2000/svg"/>\n'
|
'<svg xmlns="http://www.w3.org/2000/svg"/>\n'
|
||||||
TEMP_SVG_FILE = 'testscour_temp.svg'
|
TEMP_SVG_FILE = 'testscour_temp.svg'
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue