Merge pull request #120 from Ede123/raster_images
Fix embedding of raster images
This commit is contained in:
commit
91ee9d2112
5 changed files with 166 additions and 30 deletions
|
|
@ -60,7 +60,7 @@ from collections import namedtuple
|
||||||
from decimal import Context, Decimal, InvalidOperation, getcontext
|
from decimal import Context, Decimal, InvalidOperation, getcontext
|
||||||
|
|
||||||
import six
|
import six
|
||||||
from six.moves import range
|
from six.moves import range, urllib
|
||||||
|
|
||||||
from scour.svg_regex import svg_parser
|
from scour.svg_regex import svg_parser
|
||||||
from scour.svg_transform import svg_transform_parser
|
from scour.svg_transform import svg_transform_parser
|
||||||
|
|
@ -2939,7 +2939,6 @@ def removeComments(element):
|
||||||
|
|
||||||
def embedRasters(element, options):
|
def embedRasters(element, options):
|
||||||
import base64
|
import base64
|
||||||
import urllib
|
|
||||||
"""
|
"""
|
||||||
Converts raster references to inline images.
|
Converts raster references to inline images.
|
||||||
NOTE: there are size limits to base64-encoding handling in browsers
|
NOTE: there are size limits to base64-encoding handling in browsers
|
||||||
|
|
@ -2950,36 +2949,55 @@ def embedRasters(element, options):
|
||||||
|
|
||||||
# if xlink:href is set, then grab the id
|
# if xlink:href is set, then grab the id
|
||||||
if href != '' and len(href) > 1:
|
if href != '' and len(href) > 1:
|
||||||
# find if href value has filename ext
|
|
||||||
ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
|
ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
|
||||||
|
|
||||||
# look for 'png', 'jpg', and 'gif' extensions
|
# only operate on files with 'png', 'jpg', and 'gif' file extensions
|
||||||
if ext == 'png' or ext == 'jpg' or ext == 'gif':
|
if ext in ['png', 'jpg', 'gif']:
|
||||||
|
# fix common issues with file paths
|
||||||
|
# TODO: should we warn the user instead of trying to correct those invalid URIs?
|
||||||
|
# convert backslashes to slashes
|
||||||
|
href_fixed = href.replace('\\', '/')
|
||||||
|
# absolute 'file:' URIs have to use three slashes (unless specifying a host which I've never seen)
|
||||||
|
href_fixed = re.sub('file:/+', 'file:///', href_fixed)
|
||||||
|
|
||||||
# file:// URLs denote files on the local system too
|
# parse the URI to get scheme and path
|
||||||
if href[:7] == 'file://':
|
# in principle it would make sense to work only with this ParseResult and call 'urlunparse()' in the end
|
||||||
href = href[7:]
|
# however 'urlunparse(urlparse(file:raster.png))' -> 'file:///raster.png' which is nonsense
|
||||||
# does the file exist?
|
parsed_href = urllib.parse.urlparse(href_fixed)
|
||||||
if os.path.isfile(href):
|
|
||||||
# if this is not an absolute path, set path relative
|
# assume locations without protocol point to local files (and should use the 'file:' protocol)
|
||||||
# to script file based on input arg
|
if parsed_href.scheme == '':
|
||||||
infilename = '.'
|
parsed_href = parsed_href._replace(scheme='file')
|
||||||
|
if href_fixed[0] == '/':
|
||||||
|
href_fixed = 'file://' + href_fixed
|
||||||
|
else:
|
||||||
|
href_fixed = 'file:' + href_fixed
|
||||||
|
|
||||||
|
# relative local paths are relative to the input file, therefore temporarily change the working dir
|
||||||
|
working_dir_old = None
|
||||||
|
if parsed_href.scheme == 'file' and parsed_href.path[0] != '/':
|
||||||
if options.infilename:
|
if options.infilename:
|
||||||
infilename = options.infilename
|
working_dir_old = os.getcwd()
|
||||||
href = os.path.join(os.path.dirname(infilename), href)
|
working_dir_new = os.path.abspath(os.path.dirname(options.infilename))
|
||||||
|
os.chdir(working_dir_new)
|
||||||
|
|
||||||
|
# open/download the file
|
||||||
|
try:
|
||||||
|
file = urllib.request.urlopen(href_fixed)
|
||||||
|
rasterdata = file.read()
|
||||||
|
file.close()
|
||||||
|
except Exception as e:
|
||||||
|
print("WARNING: Could not open file '" + href + "' for embedding. "
|
||||||
|
"The raster image will be kept as a reference but might be invalid. "
|
||||||
|
"(Exception details: " + str(e) + ")", file=sys.stderr)
|
||||||
rasterdata = ''
|
rasterdata = ''
|
||||||
# test if file exists locally
|
finally:
|
||||||
if os.path.isfile(href):
|
# always restore initial working directory if we changed it above
|
||||||
# open raster file as raw binary
|
if working_dir_old is not None:
|
||||||
raster = open(href, "rb")
|
os.chdir(working_dir_old)
|
||||||
rasterdata = raster.read()
|
|
||||||
elif href[:7] == 'http://':
|
|
||||||
webFile = urllib.urlopen(href)
|
|
||||||
rasterdata = webFile.read()
|
|
||||||
webFile.close()
|
|
||||||
|
|
||||||
# ... should we remove all images which don't resolve?
|
# TODO: should we remove all images which don't resolve?
|
||||||
|
# then we also have to consider unreachable remote locations (i.e. if there is no internet connection)
|
||||||
if rasterdata != '':
|
if rasterdata != '':
|
||||||
# base64-encode raster
|
# base64-encode raster
|
||||||
b64eRaster = base64.b64encode(rasterdata)
|
b64eRaster = base64.b64encode(rasterdata)
|
||||||
|
|
@ -2991,7 +3009,8 @@ def embedRasters(element, options):
|
||||||
if ext == 'jpg':
|
if ext == 'jpg':
|
||||||
ext = 'jpeg'
|
ext = 'jpeg'
|
||||||
|
|
||||||
element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
|
element.setAttributeNS(NS['XLINK'], 'href',
|
||||||
|
'data:image/' + ext + ';base64,' + b64eRaster.decode())
|
||||||
_num_rasters_embedded += 1
|
_num_rasters_embedded += 1
|
||||||
del b64eRaster
|
del b64eRaster
|
||||||
|
|
||||||
|
|
@ -3500,10 +3519,17 @@ def scourString(in_string, options=None):
|
||||||
# input is a filename
|
# input is a filename
|
||||||
# returns the minidom doc representation of the SVG
|
# returns the minidom doc representation of the SVG
|
||||||
def scourXmlFile(filename, options=None):
|
def scourXmlFile(filename, options=None):
|
||||||
|
# we need to set infilename (otherwise relative references in the SVG won't work)
|
||||||
|
if options is None:
|
||||||
|
options = generateDefaultOptions()
|
||||||
|
options.infilename = filename
|
||||||
|
|
||||||
|
# open the file and scour it
|
||||||
with open(filename, "rb") as f:
|
with open(filename, "rb") as f:
|
||||||
in_string = f.read()
|
in_string = f.read()
|
||||||
out_string = scourString(in_string, options)
|
out_string = scourString(in_string, options)
|
||||||
|
|
||||||
|
# prepare the output xml.dom.minidom object
|
||||||
doc = xml.dom.minidom.parseString(out_string.encode('utf-8'))
|
doc = xml.dom.minidom.parseString(out_string.encode('utf-8'))
|
||||||
|
|
||||||
# since minidom does not seem to parse DTDs properly
|
# since minidom does not seem to parse DTDs properly
|
||||||
|
|
|
||||||
80
testscour.py
80
testscour.py
|
|
@ -2341,9 +2341,85 @@ class CommandLineUsage(unittest.TestCase):
|
||||||
"Statistics output not as expected when '--verbose' option was used")
|
"Statistics output not as expected when '--verbose' option was used")
|
||||||
|
|
||||||
|
|
||||||
|
class EmbedRasters(unittest.TestCase):
|
||||||
|
|
||||||
|
# quick way to ping a host using the OS 'ping' command and return the execution result
|
||||||
|
def _ping(host):
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
|
||||||
|
system = platform.system().lower()
|
||||||
|
ping_count = '-n' if system == 'windows' else '-c'
|
||||||
|
dev_null = 'NUL' if system == 'windows' else '/dev/null'
|
||||||
|
|
||||||
|
return os.system('ping ' + ping_count + ' 1 ' + host + ' > ' + dev_null)
|
||||||
|
|
||||||
|
def test_disable_embed_rasters(self):
|
||||||
|
doc = scourXmlFile('unittests/raster-formats.svg',
|
||||||
|
parse_args(['--disable-embed-rasters']))
|
||||||
|
self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'), 'raster.png',
|
||||||
|
"Raster image embedded when '--disable-embed-rasters' was specified")
|
||||||
|
|
||||||
|
def test_raster_formats(self):
|
||||||
|
doc = scourXmlFile('unittests/raster-formats.svg')
|
||||||
|
self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'),
|
||||||
|
''
|
||||||
|
'VBMVEUAAP//AAAA/wBmtfVOAAAACklEQVQI12NIAAAAYgBhGxZhsAAAAABJRU5ErkJggg==',
|
||||||
|
"Raster image (PNG) not correctly embedded.")
|
||||||
|
self.assertEqual(doc.getElementById('gif').getAttribute('xlink:href'),
|
||||||
|
'',
|
||||||
|
"Raster image (GIF) not correctly embedded.")
|
||||||
|
self.assertEqual(doc.getElementById('jpg').getAttribute('xlink:href'),
|
||||||
|
''
|
||||||
|
'2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/'
|
||||||
|
'2wBDAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/'
|
||||||
|
'wAARCAABAAMDAREAAhEBAxEB/8QAFAABAAAAAAAAAAAAAAAAAAAACv/EABoQAAEFAQAAAAAAAAAAAAAAAAgABQc3d7j/'
|
||||||
|
'xAAVAQEBAAAAAAAAAAAAAAAAAAAHCv/EABwRAAEDBQAAAAAAAAAAAAAAAAgAB7gJODl2eP/aAAwDAQACEQMRAD8AMeaF'
|
||||||
|
'/u2aj5z1Fqp7oN4rxx2kn5cPuhV6LkzG7qOyYL2r/9k=',
|
||||||
|
"Raster image (JPG) not correctly embedded.")
|
||||||
|
|
||||||
|
def test_raster_paths_local(self):
|
||||||
|
doc = scourXmlFile('unittests/raster-paths-local.svg')
|
||||||
|
images = doc.getElementsByTagName('image')
|
||||||
|
for image in images:
|
||||||
|
href = image.getAttribute('xlink:href')
|
||||||
|
self.assertTrue(href.startswith('data:image/'),
|
||||||
|
"Raster image from local path '" + href + "' not embedded.")
|
||||||
|
|
||||||
|
def test_raster_paths_local_absolute(self):
|
||||||
|
with open('unittests/raster-formats.svg', 'r') as f:
|
||||||
|
svg = f.read()
|
||||||
|
|
||||||
|
# create a reference string by scouring the original file with relative links
|
||||||
|
options = ScourOptions
|
||||||
|
options.infilename = 'unittests/raster-formats.svg'
|
||||||
|
reference_svg = scourString(svg, options)
|
||||||
|
|
||||||
|
# this will not always create formally valid paths but it'll check how robust our implementation is
|
||||||
|
# (the third path is invalid for sure because file: needs three slashes according to URI spec)
|
||||||
|
svg = svg.replace('raster.png',
|
||||||
|
'/' + os.path.abspath(os.path.dirname(__file__)) + '\\unittests\\raster.png')
|
||||||
|
svg = svg.replace('raster.gif',
|
||||||
|
'file:///' + os.path.abspath(os.path.dirname(__file__)) + '/unittests/raster.gif')
|
||||||
|
svg = svg.replace('raster.jpg',
|
||||||
|
'file:/' + os.path.abspath(os.path.dirname(__file__)) + '/unittests/raster.jpg')
|
||||||
|
|
||||||
|
svg = scourString(svg)
|
||||||
|
|
||||||
|
self.assertEqual(svg, reference_svg,
|
||||||
|
"Raster images from absolute local paths not properly embedded.")
|
||||||
|
|
||||||
|
@unittest.skipIf(_ping('raw.githubusercontent.com') != 0, "Remote server not reachable.")
|
||||||
|
def test_raster_paths_remote(self):
|
||||||
|
doc = scourXmlFile('unittests/raster-paths-remote.svg')
|
||||||
|
images = doc.getElementsByTagName('image')
|
||||||
|
for image in images:
|
||||||
|
href = image.getAttribute('xlink:href')
|
||||||
|
self.assertTrue(href.startswith('data:image/'),
|
||||||
|
"Raster image from remote path '" + href + "' not embedded.")
|
||||||
|
|
||||||
|
|
||||||
# TODO: write tests for --enable-viewboxing
|
# TODO: write tests for --enable-viewboxing
|
||||||
# TODO; write a test for embedding rasters
|
|
||||||
# TODO: write a test for --disable-embed-rasters
|
|
||||||
# TODO: write tests for --keep-editor-data
|
# TODO: write tests for --keep-editor-data
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
||||||
7
unittests/raster-formats.svg
Normal file
7
unittests/raster-formats.svg
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink= "http://www.w3.org/1999/xlink" width="170" height="210">
|
||||||
|
<text x="10" y="20">Three different formats</text>
|
||||||
|
<image id="png" x="10" y="30" width="150" height="50" xlink:href="raster.png"/>
|
||||||
|
<image id="gif" x="10" y="90" width="150" height="50" xlink:href="raster.gif"/>
|
||||||
|
<image id="jpg" x="10" y="150" width="150" height="50" xlink:href="raster.jpg"/>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 455 B |
19
unittests/raster-paths-local.svg
Normal file
19
unittests/raster-paths-local.svg
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink= "http://www.w3.org/1999/xlink" width="330" height="270">
|
||||||
|
<g>
|
||||||
|
<text x="10" y="20">Local files</text>
|
||||||
|
<image x="10" y="30" width="150" height="50" xlink:href="raster.png"/>
|
||||||
|
<image x="10" y="90" width="150" height="50" xlink:href="./raster.png"/>
|
||||||
|
<image x="10" y="150" width="150" height="50" xlink:href="../unittests/raster.png"/>
|
||||||
|
<!-- path can also be absolute but this will obviously not work across systems -->
|
||||||
|
<!--<image x="10" y="210" width="150" height="50" xlink:href="/E:/Temp/Scour/scour.git/unittests/raster.png"/>-->
|
||||||
|
</g>
|
||||||
|
<g transform="translate(160)">
|
||||||
|
<text x="10" y="20">Local files (file: protocol)</text>
|
||||||
|
<image x="10" y="30" width="150" height="50" xlink:href="file:raster.png"/>
|
||||||
|
<image x="10" y="90" width="150" height="50" xlink:href="file:./raster.png"/>
|
||||||
|
<image x="10" y="150" width="150" height="50" xlink:href="file:../unittests/raster.png"/>
|
||||||
|
<!-- path can also be absolute but this will obviously not work across systems -->
|
||||||
|
<!--<image x="10" y="210" width="150" height="50" xlink:href="file:///E:/Temp/Scour/scour.git/unittests/raster.png"/>-->
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 1.2 KiB |
8
unittests/raster-paths-remote.svg
Normal file
8
unittests/raster-paths-remote.svg
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink= "http://www.w3.org/1999/xlink" width="170" height="270">
|
||||||
|
<g>
|
||||||
|
<text x="10" y="20">Files from internet</text>
|
||||||
|
<image x="10" y="30" width="150" height="50" xlink:href="http://raw.githubusercontent.com/scour-project/scour/master/unittests/raster.png"/>
|
||||||
|
<image x="10" y="90" width="150" height="50" xlink:href="https://raw.githubusercontent.com/scour-project/scour/master/unittests/raster.png"/>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 502 B |
Loading…
Add table
Add a link
Reference in a new issue