Merge pull request #27 from Ede123/encoding
Fix character encoding issues
This commit is contained in:
commit
0a0a062718
5 changed files with 52 additions and 13 deletions
|
|
@ -3097,7 +3097,7 @@ def scourString(in_string, options=None):
|
||||||
# input is a filename
|
# input is a filename
|
||||||
# returns the minidom doc representation of the SVG
|
# returns the minidom doc representation of the SVG
|
||||||
def scourXmlFile(filename, options=None):
|
def scourXmlFile(filename, options=None):
|
||||||
with open(filename) as f:
|
with open(filename, "rb") as f:
|
||||||
in_string = f.read()
|
in_string = f.read()
|
||||||
out_string = scourString(in_string, options)
|
out_string = scourString(in_string, options)
|
||||||
return xml.dom.minidom.parseString(out_string.encode('utf-8'))
|
return xml.dom.minidom.parseString(out_string.encode('utf-8'))
|
||||||
|
|
@ -3235,14 +3235,23 @@ def parse_args(args=None, ignore_additional_args=False):
|
||||||
_options_parser.error("Input filename is the same as output filename")
|
_options_parser.error("Input filename is the same as output filename")
|
||||||
|
|
||||||
if options.infilename:
|
if options.infilename:
|
||||||
infile = maybe_gziped_file(options.infilename)
|
infile = maybe_gziped_file(options.infilename, "rb")
|
||||||
# GZ: could catch a raised IOError here and report
|
# GZ: could catch a raised IOError here and report
|
||||||
else:
|
else:
|
||||||
# GZ: could sniff for gzip compression here
|
# GZ: could sniff for gzip compression here
|
||||||
|
#
|
||||||
|
# open the binary buffer of stdin and let XML parser handle decoding
|
||||||
|
try:
|
||||||
|
infile = sys.stdin.buffer
|
||||||
|
except AttributeError:
|
||||||
infile = sys.stdin
|
infile = sys.stdin
|
||||||
if options.outfilename:
|
if options.outfilename:
|
||||||
outfile = maybe_gziped_file(options.outfilename, "wb")
|
outfile = maybe_gziped_file(options.outfilename, "wb")
|
||||||
else:
|
else:
|
||||||
|
# open the binary buffer of stdout as the output is already encoded
|
||||||
|
try:
|
||||||
|
outfile = sys.stdout.buffer
|
||||||
|
except AttributeError:
|
||||||
outfile = sys.stdout
|
outfile = sys.stdout
|
||||||
|
|
||||||
return options, [infile, outfile]
|
return options, [infile, outfile]
|
||||||
|
|
|
||||||
20
testscour.py
20
testscour.py
|
|
@ -604,12 +604,24 @@ class ChangeQuadToShorthandInPath(unittest.TestCase):
|
||||||
self.assertEqual(path.getAttribute('d'), 'm10 100q50-50 100 0t100 0',
|
self.assertEqual(path.getAttribute('d'), 'm10 100q50-50 100 0t100 0',
|
||||||
'Did not change quadratic curves into shorthand curve segments in path')
|
'Did not change quadratic curves into shorthand curve segments in path')
|
||||||
|
|
||||||
class HandleNonAsciiUtf8(unittest.TestCase):
|
class HandleEncodingUTF8(unittest.TestCase):
|
||||||
def runTest(self):
|
def runTest(self):
|
||||||
doc = scour.scourXmlFile('unittests/utf8.svg')
|
doc = scour.scourXmlFile('unittests/encoding-utf8.svg')
|
||||||
|
text = u'Hello in many languages:\nar: أهلا\nbn: হ্যালো\nel: Χαίρετε\nen: Hello\nhi: नमस्ते\niw: שלום\nja: こんにちは\nkm: ជំរាបសួរ\nml: ഹലോ\nru: Здравствуйте\nur: ہیلو\nzh: 您好'
|
||||||
desc = six.text_type(doc.getElementsByTagNameNS(SVGNS, 'desc')[0].firstChild.wholeText).strip()
|
desc = six.text_type(doc.getElementsByTagNameNS(SVGNS, 'desc')[0].firstChild.wholeText).strip()
|
||||||
self.assertEqual( desc, u'ú',
|
self.assertEqual( desc, text, 'Did not handle international UTF8 characters' )
|
||||||
'Did not handle non-ASCII characters' )
|
desc = six.text_type(doc.getElementsByTagNameNS(SVGNS, 'desc')[1].firstChild.wholeText).strip()
|
||||||
|
self.assertEqual( desc, u'“”‘’–—…‐‒°©®™•½¼¾⅓⅔†‡µ¢£€«»♠♣♥♦¿<EFBFBD>', 'Did not handle common UTF8 characters' )
|
||||||
|
desc = six.text_type(doc.getElementsByTagNameNS(SVGNS, 'desc')[2].firstChild.wholeText).strip()
|
||||||
|
self.assertEqual( desc, u':-×÷±∞π∅≤≥≠≈∧∨∩∪∈∀∃∄∑∏←↑→↓↔↕↖↗↘↙↺↻⇒⇔', 'Did not handle mathematical UTF8 characters' )
|
||||||
|
desc = six.text_type(doc.getElementsByTagNameNS(SVGNS, 'desc')[3].firstChild.wholeText).strip()
|
||||||
|
self.assertEqual( desc, u'⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁽⁾ⁿⁱ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎', 'Did not handle superscript/subscript UTF8 characters' )
|
||||||
|
|
||||||
|
class HandleEncodingISO_8859_15(unittest.TestCase):
|
||||||
|
def runTest(self):
|
||||||
|
doc = scour.scourXmlFile('unittests/encoding-iso-8859-15.svg')
|
||||||
|
desc = six.text_type(doc.getElementsByTagNameNS(SVGNS, 'desc')[0].firstChild.wholeText).strip()
|
||||||
|
self.assertEqual( desc, u'áèîäöüß€ŠšŽžŒœŸ', 'Did not handle ISO 8859-15 encoded characters' )
|
||||||
|
|
||||||
class HandleSciNoInPathData(unittest.TestCase):
|
class HandleSciNoInPathData(unittest.TestCase):
|
||||||
def runTest(self):
|
def runTest(self):
|
||||||
|
|
|
||||||
4
unittests/encoding-iso-8859-15.svg
Normal file
4
unittests/encoding-iso-8859-15.svg
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
<?xml version="1.0" encoding="ISO-8859-15" standalone="no"?>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<desc>áèîäöüߤ¦¨´¸¼½¾</desc>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 139 B |
19
unittests/encoding-utf8.svg
Normal file
19
unittests/encoding-utf8.svg
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<desc id="hello">Hello in many languages:
|
||||||
|
ar: أهلا
|
||||||
|
bn: হ্যালো
|
||||||
|
el: Χαίρετε
|
||||||
|
en: Hello
|
||||||
|
hi: नमस्ते
|
||||||
|
iw: שלום
|
||||||
|
ja: こんにちは
|
||||||
|
km: ជំរាបសួរ
|
||||||
|
ml: ഹലോ
|
||||||
|
ru: Здравствуйте
|
||||||
|
ur: ہیلو
|
||||||
|
zh: 您好</desc>
|
||||||
|
<desc id="common">“”‘’–—…‐‒°©®™•½¼¾⅓⅔†‡µ¢£€«»♠♣♥♦¿<EFBFBD></desc>
|
||||||
|
<desc id="math">:-×÷±∞π∅≤≥≠≈∧∨∩∪∈∀∃∄∑∏←↑→↓↔↕↖↗↘↙↺↻⇒⇔</desc>
|
||||||
|
<desc id="supersub">⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁽⁾ⁿⁱ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎</desc>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 731 B |
|
|
@ -1,5 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
||||||
<svg xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
xmlns="http://www.w3.org/2000/svg">
|
|
||||||
<desc>ú</desc>
|
|
||||||
</svg>
|
|
||||||
|
Before Width: | Height: | Size: 168 B |
Loading…
Add table
Add a link
Reference in a new issue