diff --git a/scour/scour.py b/scour/scour.py
index 12867da..e91d385 100644
--- a/scour/scour.py
+++ b/scour/scour.py
@@ -60,7 +60,7 @@ from collections import namedtuple
from decimal import Context, Decimal, InvalidOperation, getcontext
import six
-from six.moves import range
+from six.moves import range, urllib
from scour.svg_regex import svg_parser
from scour.svg_transform import svg_transform_parser
@@ -2939,47 +2939,65 @@ def removeComments(element):
def embedRasters(element, options):
import base64
- import urllib
"""
Converts raster references to inline images.
NOTE: there are size limits to base64-encoding handling in browsers
- """
+ """
global _num_rasters_embedded
href = element.getAttributeNS(NS['XLINK'], 'href')
# if xlink:href is set, then grab the id
if href != '' and len(href) > 1:
- # find if href value has filename ext
ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
- # look for 'png', 'jpg', and 'gif' extensions
- if ext == 'png' or ext == 'jpg' or ext == 'gif':
+ # only operate on files with 'png', 'jpg', and 'gif' file extensions
+ if ext in ['png', 'jpg', 'gif']:
+ # fix common issues with file paths
+ # TODO: should we warn the user instead of trying to correct those invalid URIs?
+ # convert backslashes to slashes
+ href_fixed = href.replace('\\', '/')
+ # absolute 'file:' URIs have to use three slashes (unless specifying a host which I've never seen)
+ href_fixed = re.sub('file:/+', 'file:///', href_fixed)
- # file:// URLs denote files on the local system too
- if href[:7] == 'file://':
- href = href[7:]
- # does the file exist?
- if os.path.isfile(href):
- # if this is not an absolute path, set path relative
- # to script file based on input arg
- infilename = '.'
+ # parse the URI to get scheme and path
+ # in principle it would make sense to work only with this ParseResult and call 'urlunparse()' in the end
+ # however 'urlunparse(urlparse(file:raster.png))' -> 'file:///raster.png' which is nonsense
+ parsed_href = urllib.parse.urlparse(href_fixed)
+
+ # assume locations without protocol point to local files (and should use the 'file:' protocol)
+ if parsed_href.scheme == '':
+ parsed_href = parsed_href._replace(scheme='file')
+ if href_fixed[0] == '/':
+ href_fixed = 'file://' + href_fixed
+ else:
+ href_fixed = 'file:' + href_fixed
+
+ # relative local paths are relative to the input file, therefore temporarily change the working dir
+ working_dir_old = None
+ if parsed_href.scheme == 'file' and parsed_href.path[0] != '/':
if options.infilename:
- infilename = options.infilename
- href = os.path.join(os.path.dirname(infilename), href)
+ working_dir_old = os.getcwd()
+ working_dir_new = os.path.abspath(os.path.dirname(options.infilename))
+ os.chdir(working_dir_new)
- rasterdata = ''
- # test if file exists locally
- if os.path.isfile(href):
- # open raster file as raw binary
- raster = open(href, "rb")
- rasterdata = raster.read()
- elif href[:7] == 'http://':
- webFile = urllib.urlopen(href)
- rasterdata = webFile.read()
- webFile.close()
+ # open/download the file
+ try:
+ file = urllib.request.urlopen(href_fixed)
+ rasterdata = file.read()
+ file.close()
+ except Exception as e:
+ print("WARNING: Could not open file '" + href + "' for embedding. "
+ "The raster image will be kept as a reference but might be invalid. "
+ "(Exception details: " + str(e) + ")", file=sys.stderr)
+ rasterdata = ''
+ finally:
+ # always restore initial working directory if we changed it above
+ if working_dir_old is not None:
+ os.chdir(working_dir_old)
- # ... should we remove all images which don't resolve?
+ # TODO: should we remove all images which don't resolve?
+ # then we also have to consider unreachable remote locations (i.e. if there is no internet connection)
if rasterdata != '':
# base64-encode raster
b64eRaster = base64.b64encode(rasterdata)
@@ -2991,7 +3009,8 @@ def embedRasters(element, options):
if ext == 'jpg':
ext = 'jpeg'
- element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
+ element.setAttributeNS(NS['XLINK'], 'href',
+ 'data:image/' + ext + ';base64,' + b64eRaster.decode())
_num_rasters_embedded += 1
del b64eRaster
@@ -3500,10 +3519,17 @@ def scourString(in_string, options=None):
# input is a filename
# returns the minidom doc representation of the SVG
def scourXmlFile(filename, options=None):
+ # we need to set infilename (otherwise relative references in the SVG won't work)
+ if options is None:
+ options = generateDefaultOptions()
+ options.infilename = filename
+
+ # open the file and scour it
with open(filename, "rb") as f:
in_string = f.read()
out_string = scourString(in_string, options)
+ # prepare the output xml.dom.minidom object
doc = xml.dom.minidom.parseString(out_string.encode('utf-8'))
# since minidom does not seem to parse DTDs properly
diff --git a/testscour.py b/testscour.py
index 08eda23..6098914 100755
--- a/testscour.py
+++ b/testscour.py
@@ -2341,9 +2341,85 @@ class CommandLineUsage(unittest.TestCase):
"Statistics output not as expected when '--verbose' option was used")
+class EmbedRasters(unittest.TestCase):
+
+ # quick way to ping a host using the OS 'ping' command and return the execution result
+ def _ping(host):
+ import os
+ import platform
+
+ system = platform.system().lower()
+ ping_count = '-n' if system == 'windows' else '-c'
+ dev_null = 'NUL' if system == 'windows' else '/dev/null'
+
+ return os.system('ping ' + ping_count + ' 1 ' + host + ' > ' + dev_null)
+
+ def test_disable_embed_rasters(self):
+ doc = scourXmlFile('unittests/raster-formats.svg',
+ parse_args(['--disable-embed-rasters']))
+ self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'), 'raster.png',
+ "Raster image embedded when '--disable-embed-rasters' was specified")
+
+ def test_raster_formats(self):
+ doc = scourXmlFile('unittests/raster-formats.svg')
+ self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'),
+ ''
+ 'VBMVEUAAP//AAAA/wBmtfVOAAAACklEQVQI12NIAAAAYgBhGxZhsAAAAABJRU5ErkJggg==',
+ "Raster image (PNG) not correctly embedded.")
+ self.assertEqual(doc.getElementById('gif').getAttribute('xlink:href'),
+ '',
+ "Raster image (GIF) not correctly embedded.")
+ self.assertEqual(doc.getElementById('jpg').getAttribute('xlink:href'),
+ ''
+ '2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/'
+ '2wBDAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/'
+ 'wAARCAABAAMDAREAAhEBAxEB/8QAFAABAAAAAAAAAAAAAAAAAAAACv/EABoQAAEFAQAAAAAAAAAAAAAAAAgABQc3d7j/'
+ 'xAAVAQEBAAAAAAAAAAAAAAAAAAAHCv/EABwRAAEDBQAAAAAAAAAAAAAAAAgAB7gJODl2eP/aAAwDAQACEQMRAD8AMeaF'
+ '/u2aj5z1Fqp7oN4rxx2kn5cPuhV6LkzG7qOyYL2r/9k=',
+ "Raster image (JPG) not correctly embedded.")
+
+ def test_raster_paths_local(self):
+ doc = scourXmlFile('unittests/raster-paths-local.svg')
+ images = doc.getElementsByTagName('image')
+ for image in images:
+ href = image.getAttribute('xlink:href')
+ self.assertTrue(href.startswith('data:image/'),
+ "Raster image from local path '" + href + "' not embedded.")
+
+ def test_raster_paths_local_absolute(self):
+ with open('unittests/raster-formats.svg', 'r') as f:
+ svg = f.read()
+
+ # create a reference string by scouring the original file with relative links
+ options = ScourOptions
+ options.infilename = 'unittests/raster-formats.svg'
+ reference_svg = scourString(svg, options)
+
+ # this will not always create formally valid paths but it'll check how robust our implementation is
+ # (the third path is invalid for sure because file: needs three slashes according to URI spec)
+ svg = svg.replace('raster.png',
+ '/' + os.path.abspath(os.path.dirname(__file__)) + '\\unittests\\raster.png')
+ svg = svg.replace('raster.gif',
+ 'file:///' + os.path.abspath(os.path.dirname(__file__)) + '/unittests/raster.gif')
+ svg = svg.replace('raster.jpg',
+ 'file:/' + os.path.abspath(os.path.dirname(__file__)) + '/unittests/raster.jpg')
+
+ svg = scourString(svg)
+
+ self.assertEqual(svg, reference_svg,
+ "Raster images from absolute local paths not properly embedded.")
+
+ @unittest.skipIf(_ping('raw.githubusercontent.com') != 0, "Remote server not reachable.")
+ def test_raster_paths_remote(self):
+ doc = scourXmlFile('unittests/raster-paths-remote.svg')
+ images = doc.getElementsByTagName('image')
+ for image in images:
+ href = image.getAttribute('xlink:href')
+ self.assertTrue(href.startswith('data:image/'),
+ "Raster image from remote path '" + href + "' not embedded.")
+
+
# TODO: write tests for --enable-viewboxing
-# TODO; write a test for embedding rasters
-# TODO: write a test for --disable-embed-rasters
# TODO: write tests for --keep-editor-data
if __name__ == '__main__':
diff --git a/unittests/raster-formats.svg b/unittests/raster-formats.svg
new file mode 100644
index 0000000..c31b65a
--- /dev/null
+++ b/unittests/raster-formats.svg
@@ -0,0 +1,7 @@
+
+
\ No newline at end of file
diff --git a/unittests/raster-paths-local.svg b/unittests/raster-paths-local.svg
new file mode 100644
index 0000000..61db8ab
--- /dev/null
+++ b/unittests/raster-paths-local.svg
@@ -0,0 +1,19 @@
+
+
\ No newline at end of file
diff --git a/unittests/raster-paths-remote.svg b/unittests/raster-paths-remote.svg
new file mode 100644
index 0000000..ede7783
--- /dev/null
+++ b/unittests/raster-paths-remote.svg
@@ -0,0 +1,8 @@
+
+
\ No newline at end of file