This commit is contained in:
Niels Thykier 2018-04-18 20:13:48 +00:00 committed by GitHub
commit 18f3cbd193
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 56 additions and 35 deletions

View file

@ -3042,7 +3042,7 @@ def removeComments(element):
return num return num
def embedRasters(element, options): def embedRasters(element, references_relative_to, options):
import base64 import base64
""" """
Converts raster references to inline images. Converts raster references to inline images.
@ -3081,9 +3081,9 @@ def embedRasters(element, options):
# relative local paths are relative to the input file, therefore temporarily change the working dir # relative local paths are relative to the input file, therefore temporarily change the working dir
working_dir_old = None working_dir_old = None
if parsed_href.scheme == 'file' and parsed_href.path[0] != '/': if parsed_href.scheme == 'file' and parsed_href.path[0] != '/':
if options.infilename: if references_relative_to:
working_dir_new = os.path.abspath(references_relative_to)
working_dir_old = os.getcwd() working_dir_old = os.getcwd()
working_dir_new = os.path.abspath(os.path.dirname(options.infilename))
os.chdir(working_dir_new) os.chdir(working_dir_new)
# open/download the file # open/download the file
@ -3333,7 +3333,7 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False):
# this is the main method # this is the main method
# input is a string representation of the input XML # input is a string representation of the input XML
# returns a string representation of the output XML # returns a string representation of the output XML
def scourString(in_string, options=None): def scourString(in_string, options=None, references_relative_to=None):
# sanitize options (take missing attributes from defaults, discard unknown attributes) # sanitize options (take missing attributes from defaults, discard unknown attributes)
options = sanitizeOptions(options) options = sanitizeOptions(options)
@ -3574,7 +3574,7 @@ def scourString(in_string, options=None):
# convert rasters references to base64-encoded strings # convert rasters references to base64-encoded strings
if options.embed_rasters: if options.embed_rasters:
for elem in doc.documentElement.getElementsByTagName('image'): for elem in doc.documentElement.getElementsByTagName('image'):
embedRasters(elem, options) embedRasters(elem, references_relative_to, options)
# properly size the SVG document (ideally width/height should be 100% with a viewBox) # properly size the SVG document (ideally width/height should be 100% with a viewBox)
if options.enable_viewboxing: if options.enable_viewboxing:
@ -3615,16 +3615,14 @@ def scourString(in_string, options=None):
# used mostly by unit tests # used mostly by unit tests
# input is a filename # input is a filename
# returns the minidom doc representation of the SVG # returns the minidom doc representation of the SVG
def scourXmlFile(filename, options=None): def scourXmlFile(filename, options=None, references_relative_to=None):
# sanitize options (take missing attributes from defaults, discard unknown attributes) # sanitize options (take missing attributes from defaults, discard unknown attributes)
options = sanitizeOptions(options) options = sanitizeOptions(options)
# we need to make sure infilename is set correctly (otherwise relative references in the SVG won't work)
options.ensure_value("infilename", filename)
# open the file and scour it # open the file and scour it
with open(filename, "rb") as f: with open(filename, "rb") as f:
in_string = f.read() in_string = f.read()
out_string = scourString(in_string, options) out_string = scourString(in_string, options, references_relative_to)
# prepare the output xml.dom.minidom object # prepare the output xml.dom.minidom object
doc = xml.dom.minidom.parseString(out_string.encode('utf-8')) doc = xml.dom.minidom.parseString(out_string.encode('utf-8'))
@ -3657,10 +3655,12 @@ class HeaderedFormatter(optparse.IndentedHelpFormatter):
# GZ: would prefer this to be in a function or class scope, but tests etc need # GZ: would prefer this to be in a function or class scope, but tests etc need
# access to the defaults anyway # access to the defaults anyway
_options_parser = optparse.OptionParser( _options_parser = optparse.OptionParser(
usage="%prog [INPUT.SVG [OUTPUT.SVG]] [OPTIONS]", usage="%prog [INPUT.SVG [[... INPUT.SVG] OUTPUT]] [OPTIONS]",
description=("If the input/output files are not specified, stdin/stdout are used. " description=("If the input/output files are not specified, stdin/stdout are used. "
"If the input/output files are specified with a svgz extension, " "If the input/output files are specified with a svgz extension, "
"then compressed SVG is assumed."), "then compressed SVG is assumed. Multiple input files can be specified "
"when output is a directory. The ouput files will be the basename of "
"the input files."),
formatter=HeaderedFormatter(max_help_position=33), formatter=HeaderedFormatter(max_help_position=33),
version=VER) version=VER)
@ -3675,11 +3675,11 @@ _options_parser.add_option("-v", "--verbose",
action="store_true", dest="verbose", default=False, action="store_true", dest="verbose", default=False,
help="verbose output (statistics, etc.)") help="verbose output (statistics, etc.)")
_options_parser.add_option("-i", _options_parser.add_option("-i",
action="store", dest="infilename", metavar="INPUT.SVG", action="append", dest="infilenames", metavar="INPUT.SVG",
help="alternative way to specify input filename") help="alternative way to specify input filenames")
_options_parser.add_option("-o", _options_parser.add_option("-o",
action="store", dest="outfilename", metavar="OUTPUT.SVG", action="store", dest="outfilename", metavar="OUTPUT",
help="alternative way to specify output filename") help="alternative way to specify output (either a file or a directory)")
_option_group_optimization = optparse.OptionGroup(_options_parser, "Optimization") _option_group_optimization = optparse.OptionGroup(_options_parser, "Optimization")
_option_group_optimization.add_option("--set-precision", _option_group_optimization.add_option("--set-precision",
@ -3793,10 +3793,11 @@ def parse_args(args=None, ignore_additional_args=False):
options, rargs = _options_parser.parse_args(args) options, rargs = _options_parser.parse_args(args)
if rargs: if rargs:
if not options.infilename: if not options.outfilename and len(rargs) > 1:
options.infilename = rargs.pop(0) options.outfilename = rargs.pop()
if not options.outfilename and rargs: if not options.infilenames:
options.outfilename = rargs.pop(0) options.infilenames = rargs
rargs = []
if not ignore_additional_args and rargs: if not ignore_additional_args and rargs:
_options_parser.error("Additional arguments not handled: %r, see --help" % rargs) _options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
if options.digits < 1: if options.digits < 1:
@ -3809,8 +3810,15 @@ def parse_args(args=None, ignore_additional_args=False):
_options_parser.error("Invalid value for --indent, see --help") _options_parser.error("Invalid value for --indent, see --help")
if options.indent_depth < 0: if options.indent_depth < 0:
_options_parser.error("Value for --nindent should be positive (or zero), see --help") _options_parser.error("Value for --nindent should be positive (or zero), see --help")
if options.infilename and options.outfilename and options.infilename == options.outfilename: if options.infilenames:
if len(options.infilenames) > 1:
if not options.outfilename or not os.path.isdir(options.outfilename):
_options_parser.error("Multiple input files requires an directory as output (-o)")
elif len(options.infilenames) == 0 and options.outfilename and options.infilenames[0] == options.outfilename:
_options_parser.error("Input filename is the same as output filename") _options_parser.error("Input filename is the same as output filename")
else:
if options.outfilename and os.path.isdir(options.outfilename):
_options_parser.error("Cannot use a directory as output when input is stdin")
return options return options
@ -3838,10 +3846,12 @@ def maybe_gziped_file(filename, mode="r"):
return open(filename, mode) return open(filename, mode)
def getInOut(options): def getInOut(input_file, options):
if options.infilename: references_relative_to = None
infile = maybe_gziped_file(options.infilename, "rb") if input_file is not None:
infile = maybe_gziped_file(input_file, "rb")
# GZ: could catch a raised IOError here and report # GZ: could catch a raised IOError here and report
references_relative_to = os.path.dirname(input_file)
else: else:
# GZ: could sniff for gzip compression here # GZ: could sniff for gzip compression here
# #
@ -3855,7 +3865,11 @@ def getInOut(options):
_options_parser.error("No input file specified, see --help for detailed usage information") _options_parser.error("No input file specified, see --help for detailed usage information")
if options.outfilename: if options.outfilename:
outfile = maybe_gziped_file(options.outfilename, "wb") dest = options.outfilename
if os.path.isdir(dest):
assert input_file is not None
dest = os.path.join(dest, os.path.basename(input_file))
outfile = maybe_gziped_file(dest, "wb")
else: else:
# open the binary buffer of stdout as the output is already encoded # open the binary buffer of stdout as the output is already encoded
try: try:
@ -3865,7 +3879,7 @@ def getInOut(options):
# redirect informational output to stderr when SVG is output to stdout # redirect informational output to stderr when SVG is output to stdout
options.stdout = sys.stderr options.stdout = sys.stderr
return [infile, outfile] return [infile, references_relative_to, outfile]
def getReport(): def getReport():
@ -3887,7 +3901,7 @@ def getReport():
) )
def start(options, input, output): def start(options, input, output, references_relative_to=None):
# sanitize options (take missing attributes from defaults, discard unknown attributes) # sanitize options (take missing attributes from defaults, discard unknown attributes)
options = sanitizeOptions(options) options = sanitizeOptions(options)
@ -3895,7 +3909,7 @@ def start(options, input, output):
# do the work # do the work
in_string = input.read() in_string = input.read()
out_string = scourString(in_string, options).encode("UTF-8") out_string = scourString(in_string, options, references_relative_to).encode("UTF-8")
output.write(out_string) output.write(out_string)
# Close input and output files (but do not attempt to close stdin/stdout!) # Close input and output files (but do not attempt to close stdin/stdout!)
@ -3926,8 +3940,14 @@ def start(options, input, output):
def run(): def run():
options = parse_args() options = parse_args()
(input, output) = getInOut(options) input_files = options.infilenames if options.infilenames is not None else []
start(options, input, output) if input_files and input_files[0] is not None:
for filename in input_files:
(input, input_relative_to, output) = getInOut(filename, options)
start(options, input, output, input_relative_to)
else:
(input, input_relative_to, output) = getInOut(None, options)
start(options, input, output, input_relative_to)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -2387,7 +2387,7 @@ class DoNotStripXmlSpaceAttribute(unittest.TestCase):
class CommandLineUsage(unittest.TestCase): class CommandLineUsage(unittest.TestCase):
USAGE_STRING = "Usage: scour [INPUT.SVG [OUTPUT.SVG]] [OPTIONS]" USAGE_STRING = "Usage: scour [INPUT.SVG [[... INPUT.SVG] OUTPUT]] [OPTIONS]"
MINIMAL_SVG = '<?xml version="1.0" encoding="UTF-8"?>\n' \ MINIMAL_SVG = '<?xml version="1.0" encoding="UTF-8"?>\n' \
'<svg xmlns="http://www.w3.org/2000/svg"/>\n' '<svg xmlns="http://www.w3.org/2000/svg"/>\n'
TEMP_SVG_FILE = 'testscour_temp.svg' TEMP_SVG_FILE = 'testscour_temp.svg'
@ -2555,12 +2555,13 @@ class EmbedRasters(unittest.TestCase):
def test_disable_embed_rasters(self): def test_disable_embed_rasters(self):
doc = scourXmlFile('unittests/raster-formats.svg', doc = scourXmlFile('unittests/raster-formats.svg',
parse_args(['--disable-embed-rasters'])) parse_args(['--disable-embed-rasters']),
'unittests')
self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'), 'raster.png', self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'), 'raster.png',
"Raster image embedded when '--disable-embed-rasters' was specified") "Raster image embedded when '--disable-embed-rasters' was specified")
def test_raster_formats(self): def test_raster_formats(self):
doc = scourXmlFile('unittests/raster-formats.svg') doc = scourXmlFile('unittests/raster-formats.svg', None, 'unittests')
self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'), self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'),
'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAMAAAABAgMAAABmjvwnAAAAC' 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAMAAAABAgMAAABmjvwnAAAAC'
'VBMVEUAAP//AAAA/wBmtfVOAAAACklEQVQI12NIAAAAYgBhGxZhsAAAAABJRU5ErkJggg==', 'VBMVEUAAP//AAAA/wBmtfVOAAAACklEQVQI12NIAAAAYgBhGxZhsAAAAABJRU5ErkJggg==',
@ -2578,7 +2579,7 @@ class EmbedRasters(unittest.TestCase):
"Raster image (JPG) not correctly embedded.") "Raster image (JPG) not correctly embedded.")
def test_raster_paths_local(self): def test_raster_paths_local(self):
doc = scourXmlFile('unittests/raster-paths-local.svg') doc = scourXmlFile('unittests/raster-paths-local.svg', None, 'unittests')
images = doc.getElementsByTagName('image') images = doc.getElementsByTagName('image')
for image in images: for image in images:
href = image.getAttribute('xlink:href') href = image.getAttribute('xlink:href')
@ -2592,7 +2593,7 @@ class EmbedRasters(unittest.TestCase):
# create a reference string by scouring the original file with relative links # create a reference string by scouring the original file with relative links
options = ScourOptions options = ScourOptions
options.infilename = 'unittests/raster-formats.svg' options.infilename = 'unittests/raster-formats.svg'
reference_svg = scourString(svg, options) reference_svg = scourString(svg, options, 'unittests')
# this will not always create formally valid paths but it'll check how robust our implementation is # this will not always create formally valid paths but it'll check how robust our implementation is
# (the third path is invalid for sure because file: needs three slashes according to URI spec) # (the third path is invalid for sure because file: needs three slashes according to URI spec)