Merge pull request #120 from Ede123/raster_images

Fix embedding of raster images
This commit is contained in:
Eduard Braun 2016-09-23 23:23:21 +02:00 committed by GitHub
commit 91ee9d2112
5 changed files with 166 additions and 30 deletions

View file

@ -60,7 +60,7 @@ from collections import namedtuple
from decimal import Context, Decimal, InvalidOperation, getcontext from decimal import Context, Decimal, InvalidOperation, getcontext
import six import six
from six.moves import range from six.moves import range, urllib
from scour.svg_regex import svg_parser from scour.svg_regex import svg_parser
from scour.svg_transform import svg_transform_parser from scour.svg_transform import svg_transform_parser
@ -2939,7 +2939,6 @@ def removeComments(element):
def embedRasters(element, options): def embedRasters(element, options):
import base64 import base64
import urllib
""" """
Converts raster references to inline images. Converts raster references to inline images.
NOTE: there are size limits to base64-encoding handling in browsers NOTE: there are size limits to base64-encoding handling in browsers
@ -2950,36 +2949,55 @@ def embedRasters(element, options):
# if xlink:href is set, then grab the id # if xlink:href is set, then grab the id
if href != '' and len(href) > 1: if href != '' and len(href) > 1:
# find if href value has filename ext
ext = os.path.splitext(os.path.basename(href))[1].lower()[1:] ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
# look for 'png', 'jpg', and 'gif' extensions # only operate on files with 'png', 'jpg', and 'gif' file extensions
if ext == 'png' or ext == 'jpg' or ext == 'gif': if ext in ['png', 'jpg', 'gif']:
# fix common issues with file paths
# TODO: should we warn the user instead of trying to correct those invalid URIs?
# convert backslashes to slashes
href_fixed = href.replace('\\', '/')
# absolute 'file:' URIs have to use three slashes (unless specifying a host which I've never seen)
href_fixed = re.sub('file:/+', 'file:///', href_fixed)
# file:// URLs denote files on the local system too # parse the URI to get scheme and path
if href[:7] == 'file://': # in principle it would make sense to work only with this ParseResult and call 'urlunparse()' in the end
href = href[7:] # however 'urlunparse(urlparse(file:raster.png))' -> 'file:///raster.png' which is nonsense
# does the file exist? parsed_href = urllib.parse.urlparse(href_fixed)
if os.path.isfile(href):
# if this is not an absolute path, set path relative # assume locations without protocol point to local files (and should use the 'file:' protocol)
# to script file based on input arg if parsed_href.scheme == '':
infilename = '.' parsed_href = parsed_href._replace(scheme='file')
if href_fixed[0] == '/':
href_fixed = 'file://' + href_fixed
else:
href_fixed = 'file:' + href_fixed
# relative local paths are relative to the input file, therefore temporarily change the working dir
working_dir_old = None
if parsed_href.scheme == 'file' and parsed_href.path[0] != '/':
if options.infilename: if options.infilename:
infilename = options.infilename working_dir_old = os.getcwd()
href = os.path.join(os.path.dirname(infilename), href) working_dir_new = os.path.abspath(os.path.dirname(options.infilename))
os.chdir(working_dir_new)
# open/download the file
try:
file = urllib.request.urlopen(href_fixed)
rasterdata = file.read()
file.close()
except Exception as e:
print("WARNING: Could not open file '" + href + "' for embedding. "
"The raster image will be kept as a reference but might be invalid. "
"(Exception details: " + str(e) + ")", file=sys.stderr)
rasterdata = '' rasterdata = ''
# test if file exists locally finally:
if os.path.isfile(href): # always restore initial working directory if we changed it above
# open raster file as raw binary if working_dir_old is not None:
raster = open(href, "rb") os.chdir(working_dir_old)
rasterdata = raster.read()
elif href[:7] == 'http://':
webFile = urllib.urlopen(href)
rasterdata = webFile.read()
webFile.close()
# ... should we remove all images which don't resolve? # TODO: should we remove all images which don't resolve?
# then we also have to consider unreachable remote locations (i.e. if there is no internet connection)
if rasterdata != '': if rasterdata != '':
# base64-encode raster # base64-encode raster
b64eRaster = base64.b64encode(rasterdata) b64eRaster = base64.b64encode(rasterdata)
@ -2991,7 +3009,8 @@ def embedRasters(element, options):
if ext == 'jpg': if ext == 'jpg':
ext = 'jpeg' ext = 'jpeg'
element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster) element.setAttributeNS(NS['XLINK'], 'href',
'data:image/' + ext + ';base64,' + b64eRaster.decode())
_num_rasters_embedded += 1 _num_rasters_embedded += 1
del b64eRaster del b64eRaster
@ -3500,10 +3519,17 @@ def scourString(in_string, options=None):
# input is a filename # input is a filename
# returns the minidom doc representation of the SVG # returns the minidom doc representation of the SVG
def scourXmlFile(filename, options=None): def scourXmlFile(filename, options=None):
# we need to set infilename (otherwise relative references in the SVG won't work)
if options is None:
options = generateDefaultOptions()
options.infilename = filename
# open the file and scour it
with open(filename, "rb") as f: with open(filename, "rb") as f:
in_string = f.read() in_string = f.read()
out_string = scourString(in_string, options) out_string = scourString(in_string, options)
# prepare the output xml.dom.minidom object
doc = xml.dom.minidom.parseString(out_string.encode('utf-8')) doc = xml.dom.minidom.parseString(out_string.encode('utf-8'))
# since minidom does not seem to parse DTDs properly # since minidom does not seem to parse DTDs properly

View file

@ -2341,9 +2341,85 @@ class CommandLineUsage(unittest.TestCase):
"Statistics output not as expected when '--verbose' option was used") "Statistics output not as expected when '--verbose' option was used")
class EmbedRasters(unittest.TestCase):
# quick way to ping a host using the OS 'ping' command and return the execution result
def _ping(host):
import os
import platform
system = platform.system().lower()
ping_count = '-n' if system == 'windows' else '-c'
dev_null = 'NUL' if system == 'windows' else '/dev/null'
return os.system('ping ' + ping_count + ' 1 ' + host + ' > ' + dev_null)
def test_disable_embed_rasters(self):
doc = scourXmlFile('unittests/raster-formats.svg',
parse_args(['--disable-embed-rasters']))
self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'), 'raster.png',
"Raster image embedded when '--disable-embed-rasters' was specified")
def test_raster_formats(self):
doc = scourXmlFile('unittests/raster-formats.svg')
self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'),
''
'VBMVEUAAP//AAAA/wBmtfVOAAAACklEQVQI12NIAAAAYgBhGxZhsAAAAABJRU5ErkJggg==',
"Raster image (PNG) not correctly embedded.")
self.assertEqual(doc.getElementById('gif').getAttribute('xlink:href'),
'',
"Raster image (GIF) not correctly embedded.")
self.assertEqual(doc.getElementById('jpg').getAttribute('xlink:href'),
''
'2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/'
'2wBDAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/'
'wAARCAABAAMDAREAAhEBAxEB/8QAFAABAAAAAAAAAAAAAAAAAAAACv/EABoQAAEFAQAAAAAAAAAAAAAAAAgABQc3d7j/'
'xAAVAQEBAAAAAAAAAAAAAAAAAAAHCv/EABwRAAEDBQAAAAAAAAAAAAAAAAgAB7gJODl2eP/aAAwDAQACEQMRAD8AMeaF'
'/u2aj5z1Fqp7oN4rxx2kn5cPuhV6LkzG7qOyYL2r/9k=',
"Raster image (JPG) not correctly embedded.")
def test_raster_paths_local(self):
doc = scourXmlFile('unittests/raster-paths-local.svg')
images = doc.getElementsByTagName('image')
for image in images:
href = image.getAttribute('xlink:href')
self.assertTrue(href.startswith('data:image/'),
"Raster image from local path '" + href + "' not embedded.")
def test_raster_paths_local_absolute(self):
with open('unittests/raster-formats.svg', 'r') as f:
svg = f.read()
# create a reference string by scouring the original file with relative links
options = ScourOptions
options.infilename = 'unittests/raster-formats.svg'
reference_svg = scourString(svg, options)
# this will not always create formally valid paths but it'll check how robust our implementation is
# (the third path is invalid for sure because file: needs three slashes according to URI spec)
svg = svg.replace('raster.png',
'/' + os.path.abspath(os.path.dirname(__file__)) + '\\unittests\\raster.png')
svg = svg.replace('raster.gif',
'file:///' + os.path.abspath(os.path.dirname(__file__)) + '/unittests/raster.gif')
svg = svg.replace('raster.jpg',
'file:/' + os.path.abspath(os.path.dirname(__file__)) + '/unittests/raster.jpg')
svg = scourString(svg)
self.assertEqual(svg, reference_svg,
"Raster images from absolute local paths not properly embedded.")
@unittest.skipIf(_ping('raw.githubusercontent.com') != 0, "Remote server not reachable.")
def test_raster_paths_remote(self):
doc = scourXmlFile('unittests/raster-paths-remote.svg')
images = doc.getElementsByTagName('image')
for image in images:
href = image.getAttribute('xlink:href')
self.assertTrue(href.startswith('data:image/'),
"Raster image from remote path '" + href + "' not embedded.")
# TODO: write tests for --enable-viewboxing # TODO: write tests for --enable-viewboxing
# TODO; write a test for embedding rasters
# TODO: write a test for --disable-embed-rasters
# TODO: write tests for --keep-editor-data # TODO: write tests for --keep-editor-data
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink= "http://www.w3.org/1999/xlink" width="170" height="210">
<text x="10" y="20">Three different formats</text>
<image id="png" x="10" y="30" width="150" height="50" xlink:href="raster.png"/>
<image id="gif" x="10" y="90" width="150" height="50" xlink:href="raster.gif"/>
<image id="jpg" x="10" y="150" width="150" height="50" xlink:href="raster.jpg"/>
</svg>

After

Width:  |  Height:  |  Size: 455 B

View file

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink= "http://www.w3.org/1999/xlink" width="330" height="270">
<g>
<text x="10" y="20">Local files</text>
<image x="10" y="30" width="150" height="50" xlink:href="raster.png"/>
<image x="10" y="90" width="150" height="50" xlink:href="./raster.png"/>
<image x="10" y="150" width="150" height="50" xlink:href="../unittests/raster.png"/>
<!-- path can also be absolute but this will obviously not work across systems -->
<!--<image x="10" y="210" width="150" height="50" xlink:href="/E:/Temp/Scour/scour.git/unittests/raster.png"/>-->
</g>
<g transform="translate(160)">
<text x="10" y="20">Local files (file: protocol)</text>
<image x="10" y="30" width="150" height="50" xlink:href="file:raster.png"/>
<image x="10" y="90" width="150" height="50" xlink:href="file:./raster.png"/>
<image x="10" y="150" width="150" height="50" xlink:href="file:../unittests/raster.png"/>
<!-- path can also be absolute but this will obviously not work across systems -->
<!--<image x="10" y="210" width="150" height="50" xlink:href="file:///E:/Temp/Scour/scour.git/unittests/raster.png"/>-->
</g>
</svg>

After

Width:  |  Height:  |  Size: 1.2 KiB

View file

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink= "http://www.w3.org/1999/xlink" width="170" height="270">
<g>
<text x="10" y="20">Files from internet</text>
<image x="10" y="30" width="150" height="50" xlink:href="http://raw.githubusercontent.com/scour-project/scour/master/unittests/raster.png"/>
<image x="10" y="90" width="150" height="50" xlink:href="https://raw.githubusercontent.com/scour-project/scour/master/unittests/raster.png"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 502 B