From d24240770178bc55093a14cca9b4a6eb3d54f9d6 Mon Sep 17 00:00:00 2001 From: JSCHILL1 Date: Tue, 28 Apr 2009 08:57:40 -0500 Subject: [PATCH] Handle unicode characters. Specify utf-8 encoding on python scripts. Added unit test for non-ASCII characters. --- fulltests/OperaMarketShareEEhover.svg | 685 ++++++++++++++++++++++++++ package.sh | 2 +- scour.py | 7 +- testscour.py | 10 +- unittests/utf8.svg | 4 + 5 files changed, 703 insertions(+), 5 deletions(-) create mode 100644 fulltests/OperaMarketShareEEhover.svg create mode 100644 unittests/utf8.svg diff --git a/fulltests/OperaMarketShareEEhover.svg b/fulltests/OperaMarketShareEEhover.svg new file mode 100644 index 0000000..73b75d7 --- /dev/null +++ b/fulltests/OperaMarketShareEEhover.svg @@ -0,0 +1,685 @@ + + + Opera market share in Eastern and Central Europe + Every country has an id which is its ISO-3116-1-ALPHA2 code in lower case. + Members of the EU have a class="eu", countries in europe (which I found turkey to be but russia not) have a class="europe". Image based on a map (http://commons.wikimedia.org/wiki/Image:Europe_countries.svg) by Julio "Tintazul" Reis. made by Marian "maix" Sigler. Released under CreativeCommons Attribution ShareAlike (http://creativecommons.org/licenses/by-sa/2.5/). Updated by David Storey to include Opera market share taken from April 2009 figures from StatCounter, and related infographics and text labels. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BELARUS + + + + + + + POLAND + + + + + + + + + + + + + KAZAKHSTAN + + + + + + + + + + + + + + + + UKRAINE + + + + + + + + + + + + + AZERBAIJAN + + + + + + + + + + GEORGIA + + + + + + + + + + + + ARMENIA + + + + + + + + + + + + + + + + + + CZECH REP. + + + + + + + + SLOVAKIA + + + + + + + + HUNGARY + + + + + + + LITHUANIA + + + + + + + LATVIA + + + + + + + + MOLDOVA + + + + + + + ROMANIA + + + + + + + BULGARIA + + + + + + + ALBANIA + + + + + + + ESTONIA + + + + + + + + + + + + + + + + + + + + + + + BOSNIA + + + + + + + SLOVENIA + + + + + + + MACEDONIA + + + + + + + + CROATIA + + + + + + + + + + RUSSIAN FEDERATION + + + + + + + + + + + + + + + + + MONT. + + + + + + + SERBIA + + + + + + + + + + + + + + + 0% + 50% + 100% + + Opera Market Share + + + + BY + + + + + 51% + + + + + GE + + + + + 47% + + + + + UA + + + + + 44% + + + + + KZ + + + + + 39% + + + + RU + + + + 38% + + + + AM + + + + 28% + + + + AZ + + + + 28% + + + + MD + + + + 23% + + + + SK + + + + 15% + + + + LT + + + + 12% + + + + PL + + + + 10% + + + + CZ + + + + 10% + + + + RS + + + + 10% + + + + LV + + + + 9% + + + + ME + + + + 8% + + + + EE + + + + 7% + + + + RO + + + + 6% + + + + BG + + + + 6% + + + + BA + + + + 5% + + + + HR + + + + 4% + + + + + HU + + + + 4% + + + + MK + + + + 4% + + + + AL + + + + 3% + + + + SI + + + + 2% + + + + APRIL 2009 + + + + + \ No newline at end of file diff --git a/package.sh b/package.sh index e417883..57fa727 100755 --- a/package.sh +++ b/package.sh @@ -1,5 +1,5 @@ #!/bin/bash -SCOURVER="0.10" +SCOURVER="0.11" cd .. tar cvf scour/tarballs/scour-$SCOURVER.tar scour/scour.py scour/svg_regex.py scour/LICENSE scour/NOTICE scour/README.txt scour/release-notes.html gzip scour/tarballs/scour-$SCOURVER.tar diff --git a/scour.py b/scour.py index 28f970c..a21af7f 100755 --- a/scour.py +++ b/scour.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Scour # @@ -71,7 +72,7 @@ from decimal import * getcontext().prec = 6 APP = 'scour' -VER = '0.10' +VER = '0.11' COPYRIGHT = 'Copyright Jeff Schiller, 2009' NS = { 'SVG': 'http://www.w3.org/2000/svg', @@ -1046,7 +1047,7 @@ def scourXmlFile(filename, options=[]): # print 'IN=',in_string out_string = scourString(in_string, options) # print 'OUT=',out_string - return xml.dom.minidom.parseString(out_string) + return xml.dom.minidom.parseString(out_string.encode('utf-8')) def printHeader(): print APP , VER @@ -1119,7 +1120,7 @@ if __name__ == '__main__': # do the work in_string = input.read() out_string = scourString(in_string, options) - output.write(out_string) + output.write(out_string.encode("utf-8")) # Close input and output files input.close() diff --git a/testscour.py b/testscour.py index fe98e51..bbf7d10 100755 --- a/testscour.py +++ b/testscour.py @@ -1,4 +1,6 @@ -#!/usr/local/bin/python +#!/usr/bin/env python +# -*- coding: utf-8 -*- + # Test Harness for Scour # # Copyright 2009 Jeff Schiller @@ -492,6 +494,12 @@ class ChangeLineToVerticalLineSegmentInPath(unittest.TestCase): self.assertEquals(path[2][1][0], 100.0, 'Did not calculate vertical line segment in path correctly' ) +class HandleNonAsciiUtf8(unittest.TestCase): + def runTest(self): + doc = scour.scourXmlFile('unittests/utf8.svg') + desc = unicode(doc.getElementsByTagNameNS(SVGNS, 'desc')[0].firstChild.wholeText).strip() + self.assertEquals( desc, u'ú', + 'Did not handle non-ASCII characters' ) if __name__ == '__main__': unittest.main() diff --git a/unittests/utf8.svg b/unittests/utf8.svg new file mode 100644 index 0000000..2fd0ce2 --- /dev/null +++ b/unittests/utf8.svg @@ -0,0 +1,4 @@ + + ú + \ No newline at end of file