diff --git a/CONTRIBUTORS b/CONTRIBUTORS index ea94cbd..91f5f24 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -8,3 +8,4 @@ Thanks to the following contributors to scour: * Martin: - better methods of handling string-to-float conversions in Python - document functions in the traditional Python way + - rewrite option parsing code diff --git a/crunch.sh b/crunch.sh index 86006a3..5ad1219 100755 --- a/crunch.sh +++ b/crunch.sh @@ -3,6 +3,5 @@ mkdir $1 for FILE in `ls fulltests` do echo Doing $FILE: - ./scour.py -i fulltests/$FILE -o $1/$FILE >> $1/report.txt + ./scour.py -i fulltests/$FILE -o $1/$FILE 2>> $1/report.txt done - \ No newline at end of file diff --git a/python-modules-pre24/fixedpoint.py b/python-modules-pre24/fixedpoint.py new file mode 100644 index 0000000..e83c846 --- /dev/null +++ b/python-modules-pre24/fixedpoint.py @@ -0,0 +1,619 @@ +#!/usr/bin/env python +""" +FixedPoint objects support decimal arithmetic with a fixed number of +digits (called the object's precision) after the decimal point. The +number of digits before the decimal point is variable & unbounded. + +The precision is user-settable on a per-object basis when a FixedPoint +is constructed, and may vary across FixedPoint objects. The precision +may also be changed after construction via FixedPoint.set_precision(p). +Note that if the precision of a FixedPoint is reduced via set_precision, +information may be lost to rounding. + +>>> x = FixedPoint("5.55") # precision defaults to 2 +>>> print x +5.55 +>>> x.set_precision(1) # round to one fraction digit +>>> print x +5.6 +>>> print FixedPoint("5.55", 1) # same thing setting to 1 in constructor +5.6 +>>> repr(x) # returns constructor string that reproduces object exactly +"FixedPoint('5.6', 1)" +>>> + +When FixedPoint objects of different precision are combined via + - * /, +the result is computed to the larger of the inputs' precisions, which also +becomes the precision of the resulting FixedPoint object. + +>>> print FixedPoint("3.42") + FixedPoint("100.005", 3) +103.425 +>>> + +When a FixedPoint is combined with other numeric types (ints, floats, +strings representing a number) via + - * /, then similarly the computation +is carried out using-- and the result inherits --the FixedPoint's +precision. + +>>> print FixedPoint(1) / 7 +0.14 +>>> print FixedPoint(1, 30) / 7 +0.142857142857142857142857142857 +>>> + +The string produced by str(x) (implictly invoked by "print") always +contains at least one digit before the decimal point, followed by a +decimal point, followed by exactly x.get_precision() digits. If x is +negative, str(x)[0] == "-". + +The FixedPoint constructor can be passed an int, long, string, float, +FixedPoint, or any object convertible to a float via float() or to a +long via long(). Passing a precision is optional; if specified, the +precision must be a non-negative int. There is no inherent limit on +the size of the precision, but if very very large you'll probably run +out of memory. + +Note that conversion of floats to FixedPoint can be surprising, and +should be avoided whenever possible. Conversion from string is exact +(up to final rounding to the requested precision), so is greatly +preferred. + +>>> print FixedPoint(1.1e30) +1099999999999999993725589651456.00 +>>> print FixedPoint("1.1e30") +1100000000000000000000000000000.00 +>>> + +The following Python operators and functions accept FixedPoints in the +expected ways: + + binary + - * / % divmod + with auto-coercion of other types to FixedPoint. + + - % divmod of FixedPoints are always exact. + * / of FixedPoints may lose information to rounding, in + which case the result is the infinitely precise answer + rounded to the result's precision. + divmod(x, y) returns (q, r) where q is a long equal to + floor(x/y) as if x/y were computed to infinite precision, + and r is a FixedPoint equal to x - q * y; no information + is lost. Note that q has the sign of y, and abs(r) < abs(y). + unary - + == != < > <= >= cmp + min max + float int long (int and long truncate) + abs + str repr + hash + use as dict keys + use as boolean (e.g. "if some_FixedPoint:" -- true iff not zero) + +Methods unique to FixedPoints: + .copy() return new FixedPoint with same value + .frac() long(x) + x.frac() == x + .get_precision() return the precision(p) of this FixedPoint object + .set_precision(p) set the precision of this FixedPoint object + +Provided as-is; use at your own risk; no warranty; no promises; enjoy! +""" + +# Released to the public domain 28-Mar-2001, +# by Tim Peters (tim.one@home.com). + + +# 28-Mar-01 ver 0.0,4 +# Use repr() instead of str() inside __str__, because str(long) changed +# since this was first written (used to produce trailing "L", doesn't +# now). +# +# 09-May-99 ver 0,0,3 +# Repaired __sub__(FixedPoint, string); was blowing up. +# Much more careful conversion of float (now best possible). +# Implemented exact % and divmod. +# +# 14-Oct-98 ver 0,0,2 +# Added int, long, frac. Beefed up docs. Removed DECIMAL_POINT +# and MINUS_SIGN globals to discourage bloating this class instead +# of writing formatting wrapper classes (or subclasses) +# +# 11-Oct-98 ver 0,0,1 +# posted to c.l.py + +__copyright__ = "Copyright (C) Python Software Foundation" +__author__ = "Tim Peters" +__version__ = 0, 1, 0 + +def bankersRounding(self, dividend, divisor, quotient, remainder): + """ + rounding via nearest-even + increment the quotient if + the remainder is more than half of the divisor + or the remainder is exactly half the divisor and the quotient is odd + """ + c = cmp(remainder << 1, divisor) + # c < 0 <-> remainder < divisor/2, etc + if c > 0 or (c == 0 and (quotient & 1) == 1): + quotient += 1 + return quotient + +def addHalfAndChop(self, dividend, divisor, quotient, remainder): + """ + the equivalent of 'add half and chop' + increment the quotient if + the remainder is greater than half of the divisor + or the remainder is exactly half the divisor and the quotient is >= 0 + """ + c = cmp(remainder << 1, divisor) + # c < 0 <-> remainder < divisor/2, etc + if c > 0 or (c == 0 and quotient >= 0): + quotient += 1 + return quotient + +# 2002-10-20 dougfort - fake classes for pre 2.2 compatibility +try: + object +except NameError: + class object: + pass + def property(x, y): + return None + +# The default value for the number of decimal digits carried after the +# decimal point. This only has effect at compile-time. +DEFAULT_PRECISION = 2 + +class FixedPoint(object): + """Basic FixedPoint object class, + The exact value is self.n / 10**self.p; + self.n is a long; self.p is an int + """ + __slots__ = ['n', 'p'] + def __init__(self, value=0, precision=DEFAULT_PRECISION): + self.n = self.p = 0 + self.set_precision(precision) + p = self.p + + if isinstance(value, type("42.3e5")): + n, exp = _string2exact(value) + # exact value is n*10**exp = n*10**(exp+p)/10**p + effective_exp = exp + p + if effective_exp > 0: + n = n * _tento(effective_exp) + elif effective_exp < 0: + n = self._roundquotient(n, _tento(-effective_exp)) + self.n = n + return + + if isinstance(value, type(42)) or isinstance(value, type(42L)): + self.n = long(value) * _tento(p) + return + + if isinstance(value, type(self)): + temp = value.copy() + temp.set_precision(p) + self.n, self.p = temp.n, temp.p + return + + if isinstance(value, type(42.0)): + # XXX ignoring infinities and NaNs and overflows for now + import math + f, e = math.frexp(abs(value)) + assert f == 0 or 0.5 <= f < 1.0 + # |value| = f * 2**e exactly + + # Suck up CHUNK bits at a time; 28 is enough so that we suck + # up all bits in 2 iterations for all known binary double- + # precision formats, and small enough to fit in an int. + CHUNK = 28 + top = 0L + # invariant: |value| = (top + f) * 2**e exactly + while f: + f = math.ldexp(f, CHUNK) + digit = int(f) + assert digit >> CHUNK == 0 + top = (top << CHUNK) | digit + f = f - digit + assert 0.0 <= f < 1.0 + e = e - CHUNK + + # now |value| = top * 2**e exactly + # want n such that n / 10**p = top * 2**e, or + # n = top * 10**p * 2**e + top = top * _tento(p) + if e >= 0: + n = top << e + else: + n = self._roundquotient(top, 1L << -e) + if value < 0: + n = -n + self.n = n + return + + if isinstance(value, type(42-42j)): + raise TypeError("can't convert complex to FixedPoint: " + + `value`) + + # can we coerce to a float? + yes = 1 + try: + asfloat = float(value) + except: + yes = 0 + if yes: + self.__init__(asfloat, p) + return + + # similarly for long + yes = 1 + try: + aslong = long(value) + except: + yes = 0 + if yes: + self.__init__(aslong, p) + return + + raise TypeError("can't convert to FixedPoint: " + `value`) + + def get_precision(self): + """Return the precision of this FixedPoint. + + The precision is the number of decimal digits carried after + the decimal point, and is an int >= 0. + """ + + return self.p + + def set_precision(self, precision=DEFAULT_PRECISION): + """Change the precision carried by this FixedPoint to p. + + precision must be an int >= 0, and defaults to + DEFAULT_PRECISION. + + If precision is less than this FixedPoint's current precision, + information may be lost to rounding. + """ + + try: + p = int(precision) + except: + raise TypeError("precision not convertable to int: " + + `precision`) + if p < 0: + raise ValueError("precision must be >= 0: " + `precision`) + + if p > self.p: + self.n = self.n * _tento(p - self.p) + elif p < self.p: + self.n = self._roundquotient(self.n, _tento(self.p - p)) + self.p = p + + precision = property(get_precision, set_precision) + + def __str__(self): + n, p = self.n, self.p + i, f = divmod(abs(n), _tento(p)) + if p: + frac = repr(f)[:-1] + frac = "0" * (p - len(frac)) + frac + else: + frac = "" + return "-"[:n<0] + \ + repr(i)[:-1] + \ + "." + frac + + def __repr__(self): + return "FixedPoint" + `(str(self), self.p)` + + def copy(self): + return _mkFP(self.n, self.p, type(self)) + + __copy__ = copy + + def __deepcopy__(self, memo): + return self.copy() + + def __cmp__(self, other): + xn, yn, p = _norm(self, other, FixedPoint=type(self)) + return cmp(xn, yn) + + def __hash__(self): + """ Caution! == values must have equal hashes, and a FixedPoint + is essentially a rational in unnormalized form. There's + really no choice here but to normalize it, so hash is + potentially expensive. + n, p = self.__reduce() + + Obscurity: if the value is an exact integer, p will be 0 now, + so the hash expression reduces to hash(n). So FixedPoints + that happen to be exact integers hash to the same things as + their int or long equivalents. This is Good. But if a + FixedPoint happens to have a value exactly representable as + a float, their hashes may differ. This is a teensy bit Bad. + """ + n, p = self.__reduce() + return hash(n) ^ hash(p) + + def __nonzero__(self): + """ Returns true if this FixedPoint is not equal to zero""" + return self.n != 0 + + def __neg__(self): + return _mkFP(-self.n, self.p, type(self)) + + def __abs__(self): + """ Returns new FixedPoint containing the absolute value of this FixedPoint""" + if self.n >= 0: + return self.copy() + else: + return -self + + def __add__(self, other): + n1, n2, p = _norm(self, other, FixedPoint=type(self)) + # n1/10**p + n2/10**p = (n1+n2)/10**p + return _mkFP(n1 + n2, p, type(self)) + + __radd__ = __add__ + + def __sub__(self, other): + if not isinstance(other, type(self)): + other = type(self)(other, self.p) + return self.__add__(-other) + + def __rsub__(self, other): + return (-self) + other + + def __mul__(self, other): + n1, n2, p = _norm(self, other, FixedPoint=type(self)) + # n1/10**p * n2/10**p = (n1*n2/10**p)/10**p + return _mkFP(self._roundquotient(n1 * n2, _tento(p)), p, type(self)) + + __rmul__ = __mul__ + + def __div__(self, other): + n1, n2, p = _norm(self, other, FixedPoint=type(self)) + if n2 == 0: + raise ZeroDivisionError("FixedPoint division") + if n2 < 0: + n1, n2 = -n1, -n2 + # n1/10**p / (n2/10**p) = n1/n2 = (n1*10**p/n2)/10**p + return _mkFP(self._roundquotient(n1 * _tento(p), n2), p, type(self)) + + def __rdiv__(self, other): + n1, n2, p = _norm(self, other, FixedPoint=type(self)) + return _mkFP(n2, p, FixedPoint=type(self)) / self + + def __divmod__(self, other): + n1, n2, p = _norm(self, other, FixedPoint=type(self)) + if n2 == 0: + raise ZeroDivisionError("FixedPoint modulo") + # floor((n1/10**p)/(n2*10**p)) = floor(n1/n2) + q = n1 / n2 + # n1/10**p - q * n2/10**p = (n1 - q * n2)/10**p + return q, _mkFP(n1 - q * n2, p, type(self)) + + def __rdivmod__(self, other): + n1, n2, p = _norm(self, other, FixedPoint=type(self)) + return divmod(_mkFP(n2, p), self) + + def __mod__(self, other): + return self.__divmod__(other)[1] + + def __rmod__(self, other): + n1, n2, p = _norm(self, other, FixedPoint=type(self)) + return _mkFP(n2, p, type(self)).__mod__(self) + + def __float__(self): + """Return the floating point representation of this FixedPoint. + Caution! float can lose precision. + """ + n, p = self.__reduce() + return float(n) / float(_tento(p)) + + def __long__(self): + """EJG/DF - Should this round instead? + Note e.g. long(-1.9) == -1L and long(1.9) == 1L in Python + Note that __int__ inherits whatever __long__ does, + and .frac() is affected too + """ + answer = abs(self.n) / _tento(self.p) + if self.n < 0: + answer = -answer + return answer + + def __int__(self): + """Return integer value of FixedPoint object.""" + return int(self.__long__()) + + def frac(self): + """Return fractional portion as a FixedPoint. + + x.frac() + long(x) == x + """ + return self - long(self) + + def _roundquotient(self, x, y): + """ + Divide x by y, + return the result of rounding + Developers may substitute their own 'round' for custom rounding + y must be > 0 + """ + assert y > 0 + n, leftover = divmod(x, y) + return self.round(x, y, n, leftover) + + def __reduce(self): + """ Return n, p s.t. self == n/10**p and n % 10 != 0""" + n, p = self.n, self.p + if n == 0: + p = 0 + while p and n % 10 == 0: + p = p - 1 + n = n / 10 + return n, p + +# 2002-10-04 dougfort - Default to Banker's Rounding for backward compatibility +FixedPoint.round = bankersRounding + +# return 10L**n + +def _tento(n, cache={}): + """Cached computation of 10**n""" + try: + return cache[n] + except KeyError: + answer = cache[n] = 10L ** n + return answer + +def _norm(x, y, isinstance=isinstance, FixedPoint=FixedPoint, + _tento=_tento): + """Return xn, yn, p s.t. + p = max(x.p, y.p) + x = xn / 10**p + y = yn / 10**p + + x must be FixedPoint to begin with; if y is not FixedPoint, + it inherits its precision from x. + + Note that this method is called a lot, so default-arg tricks are helpful. + """ + assert isinstance(x, FixedPoint) + if not isinstance(y, FixedPoint): + y = FixedPoint(y, x.p) + xn, yn = x.n, y.n + xp, yp = x.p, y.p + if xp > yp: + yn = yn * _tento(xp - yp) + p = xp + elif xp < yp: + xn = xn * _tento(yp - xp) + p = yp + else: + p = xp # same as yp + return xn, yn, p + +def _mkFP(n, p, FixedPoint=FixedPoint): + """Make FixedPoint objext - Return a new FixedPoint object with the selected precision.""" + f = FixedPoint() + #print '_mkFP Debug: %s, value=%s' % (type(f),n) + f.n = n + f.p = p + return f + +# crud for parsing strings +import re + +# There's an optional sign at the start, and an optional exponent +# at the end. The exponent has an optional sign and at least one +# digit. In between, must have either at least one digit followed +# by an optional fraction, or a decimal point followed by at least +# one digit. Yuck. + +_parser = re.compile(r""" + \s* + (?P[-+])? + ( + (?P\d+) (\. (?P\d*))? + | + \. (?P\d+) + ) + ([eE](?P[-+]? \d+))? + \s* $ +""", re.VERBOSE).match + +del re + + +def _string2exact(s): + """Return n, p s.t. float string value == n * 10**p exactly.""" + m = _parser(s) + if m is None: + raise ValueError("can't parse as number: " + `s`) + + exp = m.group('exp') + if exp is None: + exp = 0 + else: + exp = int(exp) + + intpart = m.group('int') + if intpart is None: + intpart = "0" + fracpart = m.group('onlyfrac') + else: + fracpart = m.group('frac') + if fracpart is None or fracpart == "": + fracpart = "0" + assert intpart + assert fracpart + + i, f = long(intpart), long(fracpart) + nfrac = len(fracpart) + i = i * _tento(nfrac) + f + exp = exp - nfrac + + if m.group('sign') == "-": + i = -i + + return i, exp + +def _test(): + """Unit testing framework""" + fp = FixedPoint + o = fp("0.1") + assert str(o) == "0.10" + t = fp("-20e-2", 5) + assert str(t) == "-0.20000" + assert t < o + assert o > t + assert min(o, t) == min(t, o) == t + assert max(o, t) == max(t, o) == o + assert o != t + assert --t == t + assert abs(t) > abs(o) + assert abs(o) < abs(t) + assert o == o and t == t + assert t.copy() == t + assert o == -t/2 == -.5 * t + assert abs(t) == o + o + assert abs(o) == o + assert o/t == -0.5 + assert -(t/o) == (-t)/o == t/-o == 2 + assert 1 + o == o + 1 == fp(" +00.000011e+5 ") + assert 1/o == 10 + assert o + t == t + o == -o + assert 2.0 * t == t * 2 == "2" * t == o/o * 2L * t + assert 1 - t == -(t - 1) == fp(6L)/5 + assert t*t == 4*o*o == o*4*o == o*o*4 + assert fp(2) - "1" == 1 + assert float(-1/t) == 5.0 + for p in range(20): + assert 42 + fp("1e-20", p) - 42 == 0 + assert 1/(42 + fp("1e-20", 20) - 42) == fp("100.0E18") + o = fp(".9995", 4) + assert 1 - o == fp("5e-4", 10) + o.set_precision(3) + assert o == 1 + o = fp(".9985", 4) + o.set_precision(3) + assert o == fp(".998", 10) + assert o == o.frac() + o.set_precision(100) + assert o == fp(".998", 10) + o.set_precision(2) + assert o == 1 + x = fp(1.99) + assert long(x) == -long(-x) == 1L + assert int(x) == -int(-x) == 1 + assert x == long(x) + x.frac() + assert -x == long(-x) + (-x).frac() + assert fp(7) % 4 == 7 % fp(4) == 3 + assert fp(-7) % 4 == -7 % fp(4) == 1 + assert fp(-7) % -4 == -7 % fp(-4) == -3 + assert fp(7.0) % "-4.0" == 7 % fp(-4) == -1 + assert fp("5.5") % fp("1.1") == fp("5.5e100") % fp("1.1e100") == 0 + assert divmod(fp("1e100"), 3) == (long(fp("1e100")/3), 1) + +if __name__ == '__main__': + _test() + diff --git a/python-modules-pre24/optparse.py b/python-modules-pre24/optparse.py new file mode 100644 index 0000000..ae3d00d --- /dev/null +++ b/python-modules-pre24/optparse.py @@ -0,0 +1,1569 @@ +"""optparse - a powerful, extensible, and easy-to-use option parser. + +By Greg Ward + +Originally distributed as Optik; see http://optik.sourceforge.net/ . + +If you have problems with this module, please do not file bugs, +patches, or feature requests with Python; instead, use Optik's +SourceForge project page: + http://sourceforge.net/projects/optik + +For support, use the optik-users@lists.sourceforge.net mailing list +(http://lists.sourceforge.net/lists/listinfo/optik-users). +""" + +# Python developers: please do not make changes to this file, since +# it is automatically generated from the Optik source code. + +__version__ = "1.5a2" + +__all__ = ['Option', + 'SUPPRESS_HELP', + 'SUPPRESS_USAGE', + 'Values', + 'OptionContainer', + 'OptionGroup', + 'OptionParser', + 'HelpFormatter', + 'IndentedHelpFormatter', + 'TitledHelpFormatter', + 'OptParseError', + 'OptionError', + 'OptionConflictError', + 'OptionValueError', + 'BadOptionError'] + +__copyright__ = """ +Copyright (c) 2001-2004 Gregory P. Ward. All rights reserved. +Copyright (c) 2002-2004 Python Software Foundation. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +import sys, os +import textwrap +try: + from gettext import gettext as _ +except ImportError: + _ = lambda arg: arg + +def _repr(self): + return "<%s at 0x%x: %s>" % (self.__class__.__name__, id(self), self) + + +# This file was generated from: +# Id: option_parser.py 421 2004-10-26 00:45:16Z greg +# Id: option.py 422 2004-10-26 00:53:47Z greg +# Id: help.py 367 2004-07-24 23:21:21Z gward +# Id: errors.py 367 2004-07-24 23:21:21Z gward + +class OptParseError (Exception): + def __init__(self, msg): + self.msg = msg + + def __str__(self): + return self.msg + + +class OptionError (OptParseError): + """ + Raised if an Option instance is created with invalid or + inconsistent arguments. + """ + + def __init__(self, msg, option): + self.msg = msg + self.option_id = str(option) + + def __str__(self): + if self.option_id: + return "option %s: %s" % (self.option_id, self.msg) + else: + return self.msg + +class OptionConflictError (OptionError): + """ + Raised if conflicting options are added to an OptionParser. + """ + +class OptionValueError (OptParseError): + """ + Raised if an invalid option value is encountered on the command + line. + """ + +class BadOptionError (OptParseError): + """ + Raised if an invalid or ambiguous option is seen on the command-line. + """ + + +class HelpFormatter: + + """ + Abstract base class for formatting option help. OptionParser + instances should use one of the HelpFormatter subclasses for + formatting help; by default IndentedHelpFormatter is used. + + Instance attributes: + parser : OptionParser + the controlling OptionParser instance + indent_increment : int + the number of columns to indent per nesting level + max_help_position : int + the maximum starting column for option help text + help_position : int + the calculated starting column for option help text; + initially the same as the maximum + width : int + total number of columns for output (pass None to constructor for + this value to be taken from the $COLUMNS environment variable) + level : int + current indentation level + current_indent : int + current indentation level (in columns) + help_width : int + number of columns available for option help text (calculated) + default_tag : str + text to replace with each option's default value, "%default" + by default. Set to false value to disable default value expansion. + option_strings : { Option : str } + maps Option instances to the snippet of help text explaining + the syntax of that option, e.g. "-h, --help" or + "-fFILE, --file=FILE" + _short_opt_fmt : str + format string controlling how short options with values are + printed in help text. Must be either "%s%s" ("-fFILE") or + "%s %s" ("-f FILE"), because those are the two syntaxes that + Optik supports. + _long_opt_fmt : str + similar but for long options; must be either "%s %s" ("--file FILE") + or "%s=%s" ("--file=FILE"). + """ + + NO_DEFAULT_VALUE = "none" + + def __init__(self, + indent_increment, + max_help_position, + width, + short_first): + self.parser = None + self.indent_increment = indent_increment + self.help_position = self.max_help_position = max_help_position + if width is None: + try: + width = int(os.environ['COLUMNS']) + except (KeyError, ValueError): + width = 80 + width -= 2 + self.width = width + self.current_indent = 0 + self.level = 0 + self.help_width = None # computed later + self.short_first = short_first + self.default_tag = "%default" + self.option_strings = {} + self._short_opt_fmt = "%s %s" + self._long_opt_fmt = "%s=%s" + + def set_parser(self, parser): + self.parser = parser + + def set_short_opt_delimiter(self, delim): + if delim not in ("", " "): + raise ValueError( + "invalid metavar delimiter for short options: %r" % delim) + self._short_opt_fmt = "%s" + delim + "%s" + + def set_long_opt_delimiter(self, delim): + if delim not in ("=", " "): + raise ValueError( + "invalid metavar delimiter for long options: %r" % delim) + self._long_opt_fmt = "%s" + delim + "%s" + + def indent(self): + self.current_indent += self.indent_increment + self.level += 1 + + def dedent(self): + self.current_indent -= self.indent_increment + assert self.current_indent >= 0, "Indent decreased below 0." + self.level -= 1 + + def format_usage(self, usage): + raise NotImplementedError, "subclasses must implement" + + def format_heading(self, heading): + raise NotImplementedError, "subclasses must implement" + + def format_description(self, description): + if not description: + return "" + desc_width = self.width - self.current_indent + indent = " "*self.current_indent + return textwrap.fill(description, + desc_width, + initial_indent=indent, + subsequent_indent=indent) + "\n" + + def expand_default(self, option): + if self.parser is None or not self.default_tag: + return option.help + + default_value = self.parser.defaults.get(option.dest) + if default_value is NO_DEFAULT or default_value is None: + default_value = self.NO_DEFAULT_VALUE + + return option.help.replace(self.default_tag, str(default_value)) + + def format_option(self, option): + # The help for each option consists of two parts: + # * the opt strings and metavars + # eg. ("-x", or "-fFILENAME, --file=FILENAME") + # * the user-supplied help string + # eg. ("turn on expert mode", "read data from FILENAME") + # + # If possible, we write both of these on the same line: + # -x turn on expert mode + # + # But if the opt string list is too long, we put the help + # string on a second line, indented to the same column it would + # start in if it fit on the first line. + # -fFILENAME, --file=FILENAME + # read data from FILENAME + result = [] + opts = self.option_strings[option] + opt_width = self.help_position - self.current_indent - 2 + if len(opts) > opt_width: + opts = "%*s%s\n" % (self.current_indent, "", opts) + indent_first = self.help_position + else: # start help on same line as opts + opts = "%*s%-*s " % (self.current_indent, "", opt_width, opts) + indent_first = 0 + result.append(opts) + if option.help: + help_text = self.expand_default(option) + help_lines = textwrap.wrap(help_text, self.help_width) + result.append("%*s%s\n" % (indent_first, "", help_lines[0])) + result.extend(["%*s%s\n" % (self.help_position, "", line) + for line in help_lines[1:]]) + elif opts[-1] != "\n": + result.append("\n") + return "".join(result) + + def store_option_strings(self, parser): + self.indent() + max_len = 0 + for opt in parser.option_list: + strings = self.format_option_strings(opt) + self.option_strings[opt] = strings + max_len = max(max_len, len(strings) + self.current_indent) + self.indent() + for group in parser.option_groups: + for opt in group.option_list: + strings = self.format_option_strings(opt) + self.option_strings[opt] = strings + max_len = max(max_len, len(strings) + self.current_indent) + self.dedent() + self.dedent() + self.help_position = min(max_len + 2, self.max_help_position) + self.help_width = self.width - self.help_position + + def format_option_strings(self, option): + """Return a comma-separated list of option strings & metavariables.""" + if option.takes_value(): + metavar = option.metavar or option.dest.upper() + short_opts = [self._short_opt_fmt % (sopt, metavar) + for sopt in option._short_opts] + long_opts = [self._long_opt_fmt % (lopt, metavar) + for lopt in option._long_opts] + else: + short_opts = option._short_opts + long_opts = option._long_opts + + if self.short_first: + opts = short_opts + long_opts + else: + opts = long_opts + short_opts + + return ", ".join(opts) + +class IndentedHelpFormatter (HelpFormatter): + """Format help with indented section bodies. + """ + + def __init__(self, + indent_increment=2, + max_help_position=24, + width=None, + short_first=1): + HelpFormatter.__init__( + self, indent_increment, max_help_position, width, short_first) + + def format_usage(self, usage): + return _("usage: %s\n") % usage + + def format_heading(self, heading): + return "%*s%s:\n" % (self.current_indent, "", heading) + + +class TitledHelpFormatter (HelpFormatter): + """Format help with underlined section headers. + """ + + def __init__(self, + indent_increment=0, + max_help_position=24, + width=None, + short_first=0): + HelpFormatter.__init__ ( + self, indent_increment, max_help_position, width, short_first) + + def format_usage(self, usage): + return "%s %s\n" % (self.format_heading(_("Usage")), usage) + + def format_heading(self, heading): + return "%s\n%s\n" % (heading, "=-"[self.level] * len(heading)) + + +_builtin_cvt = { "int" : (int, _("integer")), + "long" : (long, _("long integer")), + "float" : (float, _("floating-point")), + "complex" : (complex, _("complex")) } + +def check_builtin(option, opt, value): + (cvt, what) = _builtin_cvt[option.type] + try: + return cvt(value) + except ValueError: + raise OptionValueError( + _("option %s: invalid %s value: %r") % (opt, what, value)) + +def check_choice(option, opt, value): + if value in option.choices: + return value + else: + choices = ", ".join(map(repr, option.choices)) + raise OptionValueError( + _("option %s: invalid choice: %r (choose from %s)") + % (opt, value, choices)) + +# Not supplying a default is different from a default of None, +# so we need an explicit "not supplied" value. +NO_DEFAULT = ("NO", "DEFAULT") + + +class Option: + """ + Instance attributes: + _short_opts : [string] + _long_opts : [string] + + action : string + type : string + dest : string + default : any + nargs : int + const : any + choices : [string] + callback : function + callback_args : (any*) + callback_kwargs : { string : any } + help : string + metavar : string + """ + + # The list of instance attributes that may be set through + # keyword args to the constructor. + ATTRS = ['action', + 'type', + 'dest', + 'default', + 'nargs', + 'const', + 'choices', + 'callback', + 'callback_args', + 'callback_kwargs', + 'help', + 'metavar'] + + # The set of actions allowed by option parsers. Explicitly listed + # here so the constructor can validate its arguments. + ACTIONS = ("store", + "store_const", + "store_true", + "store_false", + "append", + "count", + "callback", + "help", + "version") + + # The set of actions that involve storing a value somewhere; + # also listed just for constructor argument validation. (If + # the action is one of these, there must be a destination.) + STORE_ACTIONS = ("store", + "store_const", + "store_true", + "store_false", + "append", + "count") + + # The set of actions for which it makes sense to supply a value + # type, ie. which may consume an argument from the command line. + TYPED_ACTIONS = ("store", + "append", + "callback") + + # The set of actions which *require* a value type, ie. that + # always consume an argument from the command line. + ALWAYS_TYPED_ACTIONS = ("store", + "append") + + # The set of known types for option parsers. Again, listed here for + # constructor argument validation. + TYPES = ("string", "int", "long", "float", "complex", "choice") + + # Dictionary of argument checking functions, which convert and + # validate option arguments according to the option type. + # + # Signature of checking functions is: + # check(option : Option, opt : string, value : string) -> any + # where + # option is the Option instance calling the checker + # opt is the actual option seen on the command-line + # (eg. "-a", "--file") + # value is the option argument seen on the command-line + # + # The return value should be in the appropriate Python type + # for option.type -- eg. an integer if option.type == "int". + # + # If no checker is defined for a type, arguments will be + # unchecked and remain strings. + TYPE_CHECKER = { "int" : check_builtin, + "long" : check_builtin, + "float" : check_builtin, + "complex": check_builtin, + "choice" : check_choice, + } + + + # CHECK_METHODS is a list of unbound method objects; they are called + # by the constructor, in order, after all attributes are + # initialized. The list is created and filled in later, after all + # the methods are actually defined. (I just put it here because I + # like to define and document all class attributes in the same + # place.) Subclasses that add another _check_*() method should + # define their own CHECK_METHODS list that adds their check method + # to those from this class. + CHECK_METHODS = None + + + # -- Constructor/initialization methods ---------------------------- + + def __init__(self, *opts, **attrs): + # Set _short_opts, _long_opts attrs from 'opts' tuple. + # Have to be set now, in case no option strings are supplied. + self._short_opts = [] + self._long_opts = [] + opts = self._check_opt_strings(opts) + self._set_opt_strings(opts) + + # Set all other attrs (action, type, etc.) from 'attrs' dict + self._set_attrs(attrs) + + # Check all the attributes we just set. There are lots of + # complicated interdependencies, but luckily they can be farmed + # out to the _check_*() methods listed in CHECK_METHODS -- which + # could be handy for subclasses! The one thing these all share + # is that they raise OptionError if they discover a problem. + for checker in self.CHECK_METHODS: + checker(self) + + def _check_opt_strings(self, opts): + # Filter out None because early versions of Optik had exactly + # one short option and one long option, either of which + # could be None. + opts = filter(None, opts) + if not opts: + raise TypeError("at least one option string must be supplied") + return opts + + def _set_opt_strings(self, opts): + for opt in opts: + if len(opt) < 2: + raise OptionError( + "invalid option string %r: " + "must be at least two characters long" % opt, self) + elif len(opt) == 2: + if not (opt[0] == "-" and opt[1] != "-"): + raise OptionError( + "invalid short option string %r: " + "must be of the form -x, (x any non-dash char)" % opt, + self) + self._short_opts.append(opt) + else: + if not (opt[0:2] == "--" and opt[2] != "-"): + raise OptionError( + "invalid long option string %r: " + "must start with --, followed by non-dash" % opt, + self) + self._long_opts.append(opt) + + def _set_attrs(self, attrs): + for attr in self.ATTRS: + if attrs.has_key(attr): + setattr(self, attr, attrs[attr]) + del attrs[attr] + else: + if attr == 'default': + setattr(self, attr, NO_DEFAULT) + else: + setattr(self, attr, None) + if attrs: + raise OptionError( + "invalid keyword arguments: %s" % ", ".join(attrs.keys()), + self) + + + # -- Constructor validation methods -------------------------------- + + def _check_action(self): + if self.action is None: + self.action = "store" + elif self.action not in self.ACTIONS: + raise OptionError("invalid action: %r" % self.action, self) + + def _check_type(self): + if self.type is None: + if self.action in self.ALWAYS_TYPED_ACTIONS: + if self.choices is not None: + # The "choices" attribute implies "choice" type. + self.type = "choice" + else: + # No type given? "string" is the most sensible default. + self.type = "string" + else: + # Allow type objects as an alternative to their names. + if type(self.type) is type: + self.type = self.type.__name__ + if self.type == "str": + self.type = "string" + + if self.type not in self.TYPES: + raise OptionError("invalid option type: %r" % self.type, self) + if self.action not in self.TYPED_ACTIONS: + raise OptionError( + "must not supply a type for action %r" % self.action, self) + + def _check_choice(self): + if self.type == "choice": + if self.choices is None: + raise OptionError( + "must supply a list of choices for type 'choice'", self) + elif type(self.choices) not in (tuple, list): + raise OptionError( + "choices must be a list of strings ('%s' supplied)" + % str(type(self.choices)).split("'")[1], self) + elif self.choices is not None: + raise OptionError( + "must not supply choices for type %r" % self.type, self) + + def _check_dest(self): + # No destination given, and we need one for this action. The + # self.type check is for callbacks that take a value. + takes_value = (self.action in self.STORE_ACTIONS or + self.type is not None) + if self.dest is None and takes_value: + + # Glean a destination from the first long option string, + # or from the first short option string if no long options. + if self._long_opts: + # eg. "--foo-bar" -> "foo_bar" + self.dest = self._long_opts[0][2:].replace('-', '_') + else: + self.dest = self._short_opts[0][1] + + def _check_const(self): + if self.action != "store_const" and self.const is not None: + raise OptionError( + "'const' must not be supplied for action %r" % self.action, + self) + + def _check_nargs(self): + if self.action in self.TYPED_ACTIONS: + if self.nargs is None: + self.nargs = 1 + elif self.nargs is not None: + raise OptionError( + "'nargs' must not be supplied for action %r" % self.action, + self) + + def _check_callback(self): + if self.action == "callback": + if not callable(self.callback): + raise OptionError( + "callback not callable: %r" % self.callback, self) + if (self.callback_args is not None and + type(self.callback_args) is not tuple): + raise OptionError( + "callback_args, if supplied, must be a tuple: not %r" + % self.callback_args, self) + if (self.callback_kwargs is not None and + type(self.callback_kwargs) is not dict): + raise OptionError( + "callback_kwargs, if supplied, must be a dict: not %r" + % self.callback_kwargs, self) + else: + if self.callback is not None: + raise OptionError( + "callback supplied (%r) for non-callback option" + % self.callback, self) + if self.callback_args is not None: + raise OptionError( + "callback_args supplied for non-callback option", self) + if self.callback_kwargs is not None: + raise OptionError( + "callback_kwargs supplied for non-callback option", self) + + + CHECK_METHODS = [_check_action, + _check_type, + _check_choice, + _check_dest, + _check_const, + _check_nargs, + _check_callback] + + + # -- Miscellaneous methods ----------------------------------------- + + def __str__(self): + return "/".join(self._short_opts + self._long_opts) + + __repr__ = _repr + + def takes_value(self): + return self.type is not None + + def get_opt_string(self): + if self._long_opts: + return self._long_opts[0] + else: + return self._short_opts[0] + + + # -- Processing methods -------------------------------------------- + + def check_value(self, opt, value): + checker = self.TYPE_CHECKER.get(self.type) + if checker is None: + return value + else: + return checker(self, opt, value) + + def convert_value(self, opt, value): + if value is not None: + if self.nargs == 1: + return self.check_value(opt, value) + else: + return tuple([self.check_value(opt, v) for v in value]) + + def process(self, opt, value, values, parser): + + # First, convert the value(s) to the right type. Howl if any + # value(s) are bogus. + value = self.convert_value(opt, value) + + # And then take whatever action is expected of us. + # This is a separate method to make life easier for + # subclasses to add new actions. + return self.take_action( + self.action, self.dest, opt, value, values, parser) + + def take_action(self, action, dest, opt, value, values, parser): + if action == "store": + setattr(values, dest, value) + elif action == "store_const": + setattr(values, dest, self.const) + elif action == "store_true": + setattr(values, dest, True) + elif action == "store_false": + setattr(values, dest, False) + elif action == "append": + values.ensure_value(dest, []).append(value) + elif action == "count": + setattr(values, dest, values.ensure_value(dest, 0) + 1) + elif action == "callback": + args = self.callback_args or () + kwargs = self.callback_kwargs or {} + self.callback(self, opt, value, parser, *args, **kwargs) + elif action == "help": + parser.print_help() + parser.exit() + elif action == "version": + parser.print_version() + parser.exit() + else: + raise RuntimeError, "unknown action %r" % self.action + + return 1 + +# class Option + + +SUPPRESS_HELP = "SUPPRESS"+"HELP" +SUPPRESS_USAGE = "SUPPRESS"+"USAGE" + +# For compatibility with Python 2.2 +try: + True, False +except NameError: + (True, False) = (1, 0) +try: + basestring +except NameError: + basestring = (str, unicode) + + +class Values: + + def __init__(self, defaults=None): + if defaults: + for (attr, val) in defaults.items(): + setattr(self, attr, val) + + def __str__(self): + return str(self.__dict__) + + __repr__ = _repr + + def __eq__(self, other): + if isinstance(other, Values): + return self.__dict__ == other.__dict__ + elif isinstance(other, dict): + return self.__dict__ == other + else: + return False + + def __ne__(self, other): + return not (self == other) + + def _update_careful(self, dict): + """ + Update the option values from an arbitrary dictionary, but only + use keys from dict that already have a corresponding attribute + in self. Any keys in dict without a corresponding attribute + are silently ignored. + """ + for attr in dir(self): + if dict.has_key(attr): + dval = dict[attr] + if dval is not None: + setattr(self, attr, dval) + + def _update_loose(self, dict): + """ + Update the option values from an arbitrary dictionary, + using all keys from the dictionary regardless of whether + they have a corresponding attribute in self or not. + """ + self.__dict__.update(dict) + + def _update(self, dict, mode): + if mode == "careful": + self._update_careful(dict) + elif mode == "loose": + self._update_loose(dict) + else: + raise ValueError, "invalid update mode: %r" % mode + + def read_module(self, modname, mode="careful"): + __import__(modname) + mod = sys.modules[modname] + self._update(vars(mod), mode) + + def read_file(self, filename, mode="careful"): + vars = {} + execfile(filename, vars) + self._update(vars, mode) + + def ensure_value(self, attr, value): + if not hasattr(self, attr) or getattr(self, attr) is None: + setattr(self, attr, value) + return getattr(self, attr) + + +class OptionContainer: + + """ + Abstract base class. + + Class attributes: + standard_option_list : [Option] + list of standard options that will be accepted by all instances + of this parser class (intended to be overridden by subclasses). + + Instance attributes: + option_list : [Option] + the list of Option objects contained by this OptionContainer + _short_opt : { string : Option } + dictionary mapping short option strings, eg. "-f" or "-X", + to the Option instances that implement them. If an Option + has multiple short option strings, it will appears in this + dictionary multiple times. [1] + _long_opt : { string : Option } + dictionary mapping long option strings, eg. "--file" or + "--exclude", to the Option instances that implement them. + Again, a given Option can occur multiple times in this + dictionary. [1] + defaults : { string : any } + dictionary mapping option destination names to default + values for each destination [1] + + [1] These mappings are common to (shared by) all components of the + controlling OptionParser, where they are initially created. + + """ + + def __init__(self, option_class, conflict_handler, description): + # Initialize the option list and related data structures. + # This method must be provided by subclasses, and it must + # initialize at least the following instance attributes: + # option_list, _short_opt, _long_opt, defaults. + self._create_option_list() + + self.option_class = option_class + self.set_conflict_handler(conflict_handler) + self.set_description(description) + + def _create_option_mappings(self): + # For use by OptionParser constructor -- create the master + # option mappings used by this OptionParser and all + # OptionGroups that it owns. + self._short_opt = {} # single letter -> Option instance + self._long_opt = {} # long option -> Option instance + self.defaults = {} # maps option dest -> default value + + + def _share_option_mappings(self, parser): + # For use by OptionGroup constructor -- use shared option + # mappings from the OptionParser that owns this OptionGroup. + self._short_opt = parser._short_opt + self._long_opt = parser._long_opt + self.defaults = parser.defaults + + def set_conflict_handler(self, handler): + if handler not in ("error", "resolve"): + raise ValueError, "invalid conflict_resolution value %r" % handler + self.conflict_handler = handler + + def set_description(self, description): + self.description = description + + def get_description(self): + return self.description + + + # -- Option-adding methods ----------------------------------------- + + def _check_conflict(self, option): + conflict_opts = [] + for opt in option._short_opts: + if self._short_opt.has_key(opt): + conflict_opts.append((opt, self._short_opt[opt])) + for opt in option._long_opts: + if self._long_opt.has_key(opt): + conflict_opts.append((opt, self._long_opt[opt])) + + if conflict_opts: + handler = self.conflict_handler + if handler == "error": + raise OptionConflictError( + "conflicting option string(s): %s" + % ", ".join([co[0] for co in conflict_opts]), + option) + elif handler == "resolve": + for (opt, c_option) in conflict_opts: + if opt.startswith("--"): + c_option._long_opts.remove(opt) + del self._long_opt[opt] + else: + c_option._short_opts.remove(opt) + del self._short_opt[opt] + if not (c_option._short_opts or c_option._long_opts): + c_option.container.option_list.remove(c_option) + + def add_option(self, *args, **kwargs): + """add_option(Option) + add_option(opt_str, ..., kwarg=val, ...) + """ + if type(args[0]) is str: + option = self.option_class(*args, **kwargs) + elif len(args) == 1 and not kwargs: + option = args[0] + if not isinstance(option, Option): + raise TypeError, "not an Option instance: %r" % option + else: + raise TypeError, "invalid arguments" + + self._check_conflict(option) + + self.option_list.append(option) + option.container = self + for opt in option._short_opts: + self._short_opt[opt] = option + for opt in option._long_opts: + self._long_opt[opt] = option + + if option.dest is not None: # option has a dest, we need a default + if option.default is not NO_DEFAULT: + self.defaults[option.dest] = option.default + elif not self.defaults.has_key(option.dest): + self.defaults[option.dest] = None + + return option + + def add_options(self, option_list): + for option in option_list: + self.add_option(option) + + # -- Option query/removal methods ---------------------------------- + + def get_option(self, opt_str): + return (self._short_opt.get(opt_str) or + self._long_opt.get(opt_str)) + + def has_option(self, opt_str): + return (self._short_opt.has_key(opt_str) or + self._long_opt.has_key(opt_str)) + + def remove_option(self, opt_str): + option = self._short_opt.get(opt_str) + if option is None: + option = self._long_opt.get(opt_str) + if option is None: + raise ValueError("no such option %r" % opt_str) + + for opt in option._short_opts: + del self._short_opt[opt] + for opt in option._long_opts: + del self._long_opt[opt] + option.container.option_list.remove(option) + + + # -- Help-formatting methods --------------------------------------- + + def format_option_help(self, formatter): + if not self.option_list: + return "" + result = [] + for option in self.option_list: + if not option.help is SUPPRESS_HELP: + result.append(formatter.format_option(option)) + return "".join(result) + + def format_description(self, formatter): + return formatter.format_description(self.get_description()) + + def format_help(self, formatter): + result = [] + if self.description: + result.append(self.format_description(formatter)) + if self.option_list: + result.append(self.format_option_help(formatter)) + return "\n".join(result) + + +class OptionGroup (OptionContainer): + + def __init__(self, parser, title, description=None): + self.parser = parser + OptionContainer.__init__( + self, parser.option_class, parser.conflict_handler, description) + self.title = title + + def _create_option_list(self): + self.option_list = [] + self._share_option_mappings(self.parser) + + def set_title(self, title): + self.title = title + + # -- Help-formatting methods --------------------------------------- + + def format_help(self, formatter): + result = formatter.format_heading(self.title) + formatter.indent() + result += OptionContainer.format_help(self, formatter) + formatter.dedent() + return result + + +class OptionParser (OptionContainer): + + """ + Class attributes: + standard_option_list : [Option] + list of standard options that will be accepted by all instances + of this parser class (intended to be overridden by subclasses). + + Instance attributes: + usage : string + a usage string for your program. Before it is displayed + to the user, "%prog" will be expanded to the name of + your program (self.prog or os.path.basename(sys.argv[0])). + prog : string + the name of the current program (to override + os.path.basename(sys.argv[0])). + + option_groups : [OptionGroup] + list of option groups in this parser (option groups are + irrelevant for parsing the command-line, but very useful + for generating help) + + allow_interspersed_args : bool = true + if true, positional arguments may be interspersed with options. + Assuming -a and -b each take a single argument, the command-line + -ablah foo bar -bboo baz + will be interpreted the same as + -ablah -bboo -- foo bar baz + If this flag were false, that command line would be interpreted as + -ablah -- foo bar -bboo baz + -- ie. we stop processing options as soon as we see the first + non-option argument. (This is the tradition followed by + Python's getopt module, Perl's Getopt::Std, and other argument- + parsing libraries, but it is generally annoying to users.) + + process_default_values : bool = true + if true, option default values are processed similarly to option + values from the command line: that is, they are passed to the + type-checking function for the option's type (as long as the + default value is a string). (This really only matters if you + have defined custom types; see SF bug #955889.) Set it to false + to restore the behaviour of Optik 1.4.1 and earlier. + + rargs : [string] + the argument list currently being parsed. Only set when + parse_args() is active, and continually trimmed down as + we consume arguments. Mainly there for the benefit of + callback options. + largs : [string] + the list of leftover arguments that we have skipped while + parsing options. If allow_interspersed_args is false, this + list is always empty. + values : Values + the set of option values currently being accumulated. Only + set when parse_args() is active. Also mainly for callbacks. + + Because of the 'rargs', 'largs', and 'values' attributes, + OptionParser is not thread-safe. If, for some perverse reason, you + need to parse command-line arguments simultaneously in different + threads, use different OptionParser instances. + + """ + + standard_option_list = [] + + def __init__(self, + usage=None, + option_list=None, + option_class=Option, + version=None, + conflict_handler="error", + description=None, + formatter=None, + add_help_option=True, + prog=None): + OptionContainer.__init__( + self, option_class, conflict_handler, description) + self.set_usage(usage) + self.prog = prog + self.version = version + self.allow_interspersed_args = True + self.process_default_values = True + if formatter is None: + formatter = IndentedHelpFormatter() + self.formatter = formatter + self.formatter.set_parser(self) + + # Populate the option list; initial sources are the + # standard_option_list class attribute, the 'option_list' + # argument, and (if applicable) the _add_version_option() and + # _add_help_option() methods. + self._populate_option_list(option_list, + add_help=add_help_option) + + self._init_parsing_state() + + # -- Private methods ----------------------------------------------- + # (used by our or OptionContainer's constructor) + + def _create_option_list(self): + self.option_list = [] + self.option_groups = [] + self._create_option_mappings() + + def _add_help_option(self): + self.add_option("-h", "--help", + action="help", + help=_("show this help message and exit")) + + def _add_version_option(self): + self.add_option("--version", + action="version", + help=_("show program's version number and exit")) + + def _populate_option_list(self, option_list, add_help=True): + if self.standard_option_list: + self.add_options(self.standard_option_list) + if option_list: + self.add_options(option_list) + if self.version: + self._add_version_option() + if add_help: + self._add_help_option() + + def _init_parsing_state(self): + # These are set in parse_args() for the convenience of callbacks. + self.rargs = None + self.largs = None + self.values = None + + + # -- Simple modifier methods --------------------------------------- + + def set_usage(self, usage): + if usage is None: + self.usage = _("%prog [options]") + elif usage is SUPPRESS_USAGE: + self.usage = None + # For backwards compatibility with Optik 1.3 and earlier. + elif usage.startswith("usage:" + " "): + self.usage = usage[7:] + else: + self.usage = usage + + def enable_interspersed_args(self): + self.allow_interspersed_args = True + + def disable_interspersed_args(self): + self.allow_interspersed_args = False + + def set_process_default_values(self, process): + self.process_default_values = process + + def set_default(self, dest, value): + self.defaults[dest] = value + + def set_defaults(self, **kwargs): + self.defaults.update(kwargs) + + def _get_all_options(self): + options = self.option_list[:] + for group in self.option_groups: + options.extend(group.option_list) + return options + + def get_default_values(self): + if not self.process_default_values: + # Old, pre-Optik 1.5 behaviour. + return Values(self.defaults) + + defaults = self.defaults.copy() + for option in self._get_all_options(): + default = defaults.get(option.dest) + if isinstance(default, basestring): + opt_str = option.get_opt_string() + defaults[option.dest] = option.check_value(opt_str, default) + + return Values(defaults) + + + # -- OptionGroup methods ------------------------------------------- + + def add_option_group(self, *args, **kwargs): + # XXX lots of overlap with OptionContainer.add_option() + if type(args[0]) is str: + group = OptionGroup(self, *args, **kwargs) + elif len(args) == 1 and not kwargs: + group = args[0] + if not isinstance(group, OptionGroup): + raise TypeError, "not an OptionGroup instance: %r" % group + if group.parser is not self: + raise ValueError, "invalid OptionGroup (wrong parser)" + else: + raise TypeError, "invalid arguments" + + self.option_groups.append(group) + return group + + def get_option_group(self, opt_str): + option = (self._short_opt.get(opt_str) or + self._long_opt.get(opt_str)) + if option and option.container is not self: + return option.container + return None + + + # -- Option-parsing methods ---------------------------------------- + + def _get_args(self, args): + if args is None: + return sys.argv[1:] + else: + return args[:] # don't modify caller's list + + def parse_args(self, args=None, values=None): + """ + parse_args(args : [string] = sys.argv[1:], + values : Values = None) + -> (values : Values, args : [string]) + + Parse the command-line options found in 'args' (default: + sys.argv[1:]). Any errors result in a call to 'error()', which + by default prints the usage message to stderr and calls + sys.exit() with an error message. On success returns a pair + (values, args) where 'values' is an Values instance (with all + your option values) and 'args' is the list of arguments left + over after parsing options. + """ + rargs = self._get_args(args) + if values is None: + values = self.get_default_values() + + # Store the halves of the argument list as attributes for the + # convenience of callbacks: + # rargs + # the rest of the command-line (the "r" stands for + # "remaining" or "right-hand") + # largs + # the leftover arguments -- ie. what's left after removing + # options and their arguments (the "l" stands for "leftover" + # or "left-hand") + self.rargs = rargs + self.largs = largs = [] + self.values = values + + try: + stop = self._process_args(largs, rargs, values) + except (BadOptionError, OptionValueError), err: + self.error(err.msg) + + args = largs + rargs + return self.check_values(values, args) + + def check_values(self, values, args): + """ + check_values(values : Values, args : [string]) + -> (values : Values, args : [string]) + + Check that the supplied option values and leftover arguments are + valid. Returns the option values and leftover arguments + (possibly adjusted, possibly completely new -- whatever you + like). Default implementation just returns the passed-in + values; subclasses may override as desired. + """ + return (values, args) + + def _process_args(self, largs, rargs, values): + """_process_args(largs : [string], + rargs : [string], + values : Values) + + Process command-line arguments and populate 'values', consuming + options and arguments from 'rargs'. If 'allow_interspersed_args' is + false, stop at the first non-option argument. If true, accumulate any + interspersed non-option arguments in 'largs'. + """ + while rargs: + arg = rargs[0] + # We handle bare "--" explicitly, and bare "-" is handled by the + # standard arg handler since the short arg case ensures that the + # len of the opt string is greater than 1. + if arg == "--": + del rargs[0] + return + elif arg[0:2] == "--": + # process a single long option (possibly with value(s)) + self._process_long_opt(rargs, values) + elif arg[:1] == "-" and len(arg) > 1: + # process a cluster of short options (possibly with + # value(s) for the last one only) + self._process_short_opts(rargs, values) + elif self.allow_interspersed_args: + largs.append(arg) + del rargs[0] + else: + return # stop now, leave this arg in rargs + + # Say this is the original argument list: + # [arg0, arg1, ..., arg(i-1), arg(i), arg(i+1), ..., arg(N-1)] + # ^ + # (we are about to process arg(i)). + # + # Then rargs is [arg(i), ..., arg(N-1)] and largs is a *subset* of + # [arg0, ..., arg(i-1)] (any options and their arguments will have + # been removed from largs). + # + # The while loop will usually consume 1 or more arguments per pass. + # If it consumes 1 (eg. arg is an option that takes no arguments), + # then after _process_arg() is done the situation is: + # + # largs = subset of [arg0, ..., arg(i)] + # rargs = [arg(i+1), ..., arg(N-1)] + # + # If allow_interspersed_args is false, largs will always be + # *empty* -- still a subset of [arg0, ..., arg(i-1)], but + # not a very interesting subset! + + def _match_long_opt(self, opt): + """_match_long_opt(opt : string) -> string + + Determine which long option string 'opt' matches, ie. which one + it is an unambiguous abbrevation for. Raises BadOptionError if + 'opt' doesn't unambiguously match any long option string. + """ + return _match_abbrev(opt, self._long_opt) + + def _process_long_opt(self, rargs, values): + arg = rargs.pop(0) + + # Value explicitly attached to arg? Pretend it's the next + # argument. + if "=" in arg: + (opt, next_arg) = arg.split("=", 1) + rargs.insert(0, next_arg) + had_explicit_value = True + else: + opt = arg + had_explicit_value = False + + opt = self._match_long_opt(opt) + option = self._long_opt[opt] + if option.takes_value(): + nargs = option.nargs + if len(rargs) < nargs: + if nargs == 1: + self.error(_("%s option requires an argument") % opt) + else: + self.error(_("%s option requires %d arguments") + % (opt, nargs)) + elif nargs == 1: + value = rargs.pop(0) + else: + value = tuple(rargs[0:nargs]) + del rargs[0:nargs] + + elif had_explicit_value: + self.error(_("%s option does not take a value") % opt) + + else: + value = None + + option.process(opt, value, values, self) + + def _process_short_opts(self, rargs, values): + arg = rargs.pop(0) + stop = False + i = 1 + for ch in arg[1:]: + opt = "-" + ch + option = self._short_opt.get(opt) + i += 1 # we have consumed a character + + if not option: + self.error(_("no such option: %s") % opt) + if option.takes_value(): + # Any characters left in arg? Pretend they're the + # next arg, and stop consuming characters of arg. + if i < len(arg): + rargs.insert(0, arg[i:]) + stop = True + + nargs = option.nargs + if len(rargs) < nargs: + if nargs == 1: + self.error(_("%s option requires an argument") % opt) + else: + self.error(_("%s option requires %d arguments") + % (opt, nargs)) + elif nargs == 1: + value = rargs.pop(0) + else: + value = tuple(rargs[0:nargs]) + del rargs[0:nargs] + + else: # option doesn't take a value + value = None + + option.process(opt, value, values, self) + + if stop: + break + + + # -- Feedback methods ---------------------------------------------- + + def get_prog_name(self): + if self.prog is None: + return os.path.basename(sys.argv[0]) + else: + return self.prog + + def expand_prog_name(self, s): + return s.replace("%prog", self.get_prog_name()) + + def get_description(self): + return self.expand_prog_name(self.description) + + def exit(self, status=0, msg=None): + if msg: + sys.stderr.write(msg) + sys.exit(status) + + def error(self, msg): + """error(msg : string) + + Print a usage message incorporating 'msg' to stderr and exit. + If you override this in a subclass, it should not return -- it + should either exit or raise an exception. + """ + self.print_usage(sys.stderr) + self.exit(2, "%s: error: %s\n" % (self.get_prog_name(), msg)) + + def get_usage(self): + if self.usage: + return self.formatter.format_usage( + self.expand_prog_name(self.usage)) + else: + return "" + + def print_usage(self, file=None): + """print_usage(file : file = stdout) + + Print the usage message for the current program (self.usage) to + 'file' (default stdout). Any occurence of the string "%prog" in + self.usage is replaced with the name of the current program + (basename of sys.argv[0]). Does nothing if self.usage is empty + or not defined. + """ + if self.usage: + print >>file, self.get_usage() + + def get_version(self): + if self.version: + return self.expand_prog_name(self.version) + else: + return "" + + def print_version(self, file=None): + """print_version(file : file = stdout) + + Print the version message for this program (self.version) to + 'file' (default stdout). As with print_usage(), any occurence + of "%prog" in self.version is replaced by the current program's + name. Does nothing if self.version is empty or undefined. + """ + if self.version: + print >>file, self.get_version() + + def format_option_help(self, formatter=None): + if formatter is None: + formatter = self.formatter + formatter.store_option_strings(self) + result = [] + result.append(formatter.format_heading(_("options"))) + formatter.indent() + if self.option_list: + result.append(OptionContainer.format_option_help(self, formatter)) + result.append("\n") + for group in self.option_groups: + result.append(group.format_help(formatter)) + result.append("\n") + formatter.dedent() + # Drop the last "\n", or the header if no options or option groups: + return "".join(result[:-1]) + + def format_help(self, formatter=None): + if formatter is None: + formatter = self.formatter + result = [] + if self.usage: + result.append(self.get_usage() + "\n") + if self.description: + result.append(self.format_description(formatter) + "\n") + result.append(self.format_option_help(formatter)) + return "".join(result) + + def print_help(self, file=None): + """print_help(file : file = stdout) + + Print an extended help message, listing all options and any + help text provided with them, to 'file' (default stdout). + """ + if file is None: + file = sys.stdout + file.write(self.format_help()) + +# class OptionParser + + +def _match_abbrev(s, wordmap): + """_match_abbrev(s : string, wordmap : {string : Option}) -> string + + Return the string key in 'wordmap' for which 's' is an unambiguous + abbreviation. If 's' is found to be ambiguous or doesn't match any of + 'words', raise BadOptionError. + """ + # Is there an exact match? + if wordmap.has_key(s): + return s + else: + # Isolate all words with s as a prefix. + possibilities = [word for word in wordmap.keys() + if word.startswith(s)] + # No exact match, so there had better be just one possibility. + if len(possibilities) == 1: + return possibilities[0] + elif not possibilities: + raise BadOptionError(_("no such option: %s") % s) + else: + # More than one possible completion: ambiguous prefix. + raise BadOptionError(_("ambiguous option: %s (%s?)") + % (s, ", ".join(possibilities))) + + +# Some day, there might be many Option classes. As of Optik 1.3, the +# preferred way to instantiate Options is indirectly, via make_option(), +# which will become a factory function when there are many Option +# classes. +make_option = Option diff --git a/python-modules-pre24/textwrap.py b/python-modules-pre24/textwrap.py new file mode 100644 index 0000000..d3bd7c7 --- /dev/null +++ b/python-modules-pre24/textwrap.py @@ -0,0 +1,374 @@ +"""Text wrapping and filling. +""" + +# Copyright (C) 1999-2001 Gregory P. Ward. +# Copyright (C) 2002, 2003 Python Software Foundation. +# Written by Greg Ward + +__revision__ = "$Id: textwrap.py 46863 2006-06-11 19:42:51Z tim.peters $" + +import string, re + +# Do the right thing with boolean values for all known Python versions +# (so this module can be copied to projects that don't depend on Python +# 2.3, e.g. Optik and Docutils). +try: + True, False +except NameError: + (True, False) = (1, 0) + +__all__ = ['TextWrapper', 'wrap', 'fill'] + +# Hardcode the recognized whitespace characters to the US-ASCII +# whitespace characters. The main reason for doing this is that in +# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales +# that character winds up in string.whitespace. Respecting +# string.whitespace in those cases would 1) make textwrap treat 0xa0 the +# same as any other whitespace char, which is clearly wrong (it's a +# *non-breaking* space), 2) possibly cause problems with Unicode, +# since 0xa0 is not in range(128). +_whitespace = '\t\n\x0b\x0c\r ' + +class TextWrapper: + """ + Object for wrapping/filling text. The public interface consists of + the wrap() and fill() methods; the other methods are just there for + subclasses to override in order to tweak the default behaviour. + If you want to completely replace the main wrapping algorithm, + you'll probably have to override _wrap_chunks(). + + Several instance attributes control various aspects of wrapping: + width (default: 70) + the maximum width of wrapped lines (unless break_long_words + is false) + initial_indent (default: "") + string that will be prepended to the first line of wrapped + output. Counts towards the line's width. + subsequent_indent (default: "") + string that will be prepended to all lines save the first + of wrapped output; also counts towards each line's width. + expand_tabs (default: true) + Expand tabs in input text to spaces before further processing. + Each tab will become 1 .. 8 spaces, depending on its position in + its line. If false, each tab is treated as a single character. + replace_whitespace (default: true) + Replace all whitespace characters in the input text by spaces + after tab expansion. Note that if expand_tabs is false and + replace_whitespace is true, every tab will be converted to a + single space! + fix_sentence_endings (default: false) + Ensure that sentence-ending punctuation is always followed + by two spaces. Off by default because the algorithm is + (unavoidably) imperfect. + break_long_words (default: true) + Break words longer than 'width'. If false, those words will not + be broken, and some lines might be longer than 'width'. + """ + + whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace)) + + unicode_whitespace_trans = {} + uspace = ord(u' ') + for x in map(ord, _whitespace): + unicode_whitespace_trans[x] = uspace + + # This funky little regex is just the trick for splitting + # text up into word-wrappable chunks. E.g. + # "Hello there -- you goof-ball, use the -b option!" + # splits into + # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! + # (after stripping out empty strings). + wordsep_re = re.compile( + r'(\s+|' # any whitespace + r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words + r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash + + # XXX this is not locale- or charset-aware -- string.lowercase + # is US-ASCII only (and therefore English-only) + sentence_end_re = re.compile(r'[%s]' # lowercase letter + r'[\.\!\?]' # sentence-ending punct. + r'[\"\']?' # optional end-of-quote + % string.lowercase) + + + def __init__(self, + width=70, + initial_indent="", + subsequent_indent="", + expand_tabs=True, + replace_whitespace=True, + fix_sentence_endings=False, + break_long_words=True): + self.width = width + self.initial_indent = initial_indent + self.subsequent_indent = subsequent_indent + self.expand_tabs = expand_tabs + self.replace_whitespace = replace_whitespace + self.fix_sentence_endings = fix_sentence_endings + self.break_long_words = break_long_words + + + # -- Private methods ----------------------------------------------- + # (possibly useful for subclasses to override) + + def _munge_whitespace(self, text): + """_munge_whitespace(text : string) -> string + + Munge whitespace in text: expand tabs and convert all other + whitespace characters to spaces. Eg. " foo\tbar\n\nbaz" + becomes " foo bar baz". + """ + if self.expand_tabs: + text = text.expandtabs() + if self.replace_whitespace: + if isinstance(text, str): + text = text.translate(self.whitespace_trans) + elif isinstance(text, unicode): + text = text.translate(self.unicode_whitespace_trans) + return text + + + def _split(self, text): + """_split(text : string) -> [string] + + Split the text to wrap into indivisible chunks. Chunks are + not quite the same as words; see wrap_chunks() for full + details. As an example, the text + Look, goof-ball -- use the -b option! + breaks into the following chunks: + 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', + 'use', ' ', 'the', ' ', '-b', ' ', 'option!' + """ + chunks = self.wordsep_re.split(text) + chunks = filter(None, chunks) + return chunks + + def _fix_sentence_endings(self, chunks): + """_fix_sentence_endings(chunks : [string]) + + Correct for sentence endings buried in 'chunks'. Eg. when the + original text contains "... foo.\nBar ...", munge_whitespace() + and split() will convert that to [..., "foo.", " ", "Bar", ...] + which has one too few spaces; this method simply changes the one + space to two. + """ + i = 0 + pat = self.sentence_end_re + while i < len(chunks)-1: + if chunks[i+1] == " " and pat.search(chunks[i]): + chunks[i+1] = " " + i += 2 + else: + i += 1 + + def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): + """_handle_long_word(chunks : [string], + cur_line : [string], + cur_len : int, width : int) + + Handle a chunk of text (most likely a word, not whitespace) that + is too long to fit in any line. + """ + space_left = max(width - cur_len, 1) + + # If we're allowed to break long words, then do so: put as much + # of the next chunk onto the current line as will fit. + if self.break_long_words: + cur_line.append(reversed_chunks[-1][:space_left]) + reversed_chunks[-1] = reversed_chunks[-1][space_left:] + + # Otherwise, we have to preserve the long word intact. Only add + # it to the current line if there's nothing already there -- + # that minimizes how much we violate the width constraint. + elif not cur_line: + cur_line.append(reversed_chunks.pop()) + + # If we're not allowed to break long words, and there's already + # text on the current line, do nothing. Next time through the + # main loop of _wrap_chunks(), we'll wind up here again, but + # cur_len will be zero, so the next line will be entirely + # devoted to the long word that we can't handle right now. + + def _wrap_chunks(self, chunks): + """_wrap_chunks(chunks : [string]) -> [string] + + Wrap a sequence of text chunks and return a list of lines of + length 'self.width' or less. (If 'break_long_words' is false, + some lines may be longer than this.) Chunks correspond roughly + to words and the whitespace between them: each chunk is + indivisible (modulo 'break_long_words'), but a line break can + come between any two chunks. Chunks should not have internal + whitespace; ie. a chunk is either all whitespace or a "word". + Whitespace chunks will be removed from the beginning and end of + lines, but apart from that whitespace is preserved. + """ + lines = [] + if self.width <= 0: + raise ValueError("invalid width %r (must be > 0)" % self.width) + + # Arrange in reverse order so items can be efficiently popped + # from a stack of chucks. + chunks.reverse() + + while chunks: + + # Start the list of chunks that will make up the current line. + # cur_len is just the length of all the chunks in cur_line. + cur_line = [] + cur_len = 0 + + # Figure out which static string will prefix this line. + if lines: + indent = self.subsequent_indent + else: + indent = self.initial_indent + + # Maximum width for this line. + width = self.width - len(indent) + + # First chunk on line is whitespace -- drop it, unless this + # is the very beginning of the text (ie. no lines started yet). + if chunks[-1].strip() == '' and lines: + del chunks[-1] + + while chunks: + l = len(chunks[-1]) + + # Can at least squeeze this chunk onto the current line. + if cur_len + l <= width: + cur_line.append(chunks.pop()) + cur_len += l + + # Nope, this line is full. + else: + break + + # The current line is full, and the next chunk is too big to + # fit on *any* line (not just this one). + if chunks and len(chunks[-1]) > width: + self._handle_long_word(chunks, cur_line, cur_len, width) + + # If the last chunk on this line is all whitespace, drop it. + if cur_line and cur_line[-1].strip() == '': + del cur_line[-1] + + # Convert current line back to a string and store it in list + # of all lines (return value). + if cur_line: + lines.append(indent + ''.join(cur_line)) + + return lines + + + # -- Public interface ---------------------------------------------- + + def wrap(self, text): + """wrap(text : string) -> [string] + + Reformat the single paragraph in 'text' so it fits in lines of + no more than 'self.width' columns, and return a list of wrapped + lines. Tabs in 'text' are expanded with string.expandtabs(), + and all other whitespace characters (including newline) are + converted to space. + """ + text = self._munge_whitespace(text) + chunks = self._split(text) + if self.fix_sentence_endings: + self._fix_sentence_endings(chunks) + return self._wrap_chunks(chunks) + + def fill(self, text): + """fill(text : string) -> string + + Reformat the single paragraph in 'text' to fit in lines of no + more than 'self.width' columns, and return a new string + containing the entire wrapped paragraph. + """ + return "\n".join(self.wrap(text)) + + +# -- Convenience interface --------------------------------------------- + +def wrap(text, width=70, **kwargs): + """Wrap a single paragraph of text, returning a list of wrapped lines. + + Reformat the single paragraph in 'text' so it fits in lines of no + more than 'width' columns, and return a list of wrapped lines. By + default, tabs in 'text' are expanded with string.expandtabs(), and + all other whitespace characters (including newline) are converted to + space. See TextWrapper class for available keyword args to customize + wrapping behaviour. + """ + w = TextWrapper(width=width, **kwargs) + return w.wrap(text) + +def fill(text, width=70, **kwargs): + """Fill a single paragraph of text, returning a new string. + + Reformat the single paragraph in 'text' to fit in lines of no more + than 'width' columns, and return a new string containing the entire + wrapped paragraph. As with wrap(), tabs are expanded and other + whitespace characters converted to space. See TextWrapper class for + available keyword args to customize wrapping behaviour. + """ + w = TextWrapper(width=width, **kwargs) + return w.fill(text) + + +# -- Loosely related functionality ------------------------------------- + +_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) +_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) + +def dedent(text): + """Remove any common leading whitespace from every line in `text`. + + This can be used to make triple-quoted strings line up with the left + edge of the display, while still presenting them in the source code + in indented form. + + Note that tabs and spaces are both treated as whitespace, but they + are not equal: the lines " hello" and "\thello" are + considered to have no common leading whitespace. (This behaviour is + new in Python 2.5; older versions of this module incorrectly + expanded tabs before searching for common leading whitespace.) + """ + # Look for the longest leading string of spaces and tabs common to + # all lines. + margin = None + text = _whitespace_only_re.sub('', text) + indents = _leading_whitespace_re.findall(text) + for indent in indents: + if margin is None: + margin = indent + + # Current line more deeply indented than previous winner: + # no change (previous winner is still on top). + elif indent.startswith(margin): + pass + + # Current line consistent with and no deeper than previous winner: + # it's the new winner. + elif margin.startswith(indent): + margin = indent + + # Current line and previous winner have no common whitespace: + # there is no margin. + else: + margin = "" + break + + # sanity check (testing/debugging only) + if 0 and margin: + for line in text.split("\n"): + assert not line or line.startswith(margin), \ + "line = %r, margin = %r" % (line, margin) + + if margin: + text = re.sub(r'(?m)^' + margin, '', text) + return text + +if __name__ == "__main__": + #print dedent("\tfoo\n\tbar") + #print dedent(" \thello there\n \t how are you?") + print dedent("Hello there.\n This is indented.") diff --git a/release-notes.html b/release-notes.html index 3e6d8f7..c5b5ab5 100644 --- a/release-notes.html +++ b/release-notes.html @@ -18,6 +18,7 @@
  • Collapse adjacent commands of the same type
  • Convert straight curves into line commands
  • Eliminate last segment in a polygon
  • +
  • Rework command-line argument parsing
  • diff --git a/scour.py b/scour.py index c45fc45..1c3b333 100755 --- a/scour.py +++ b/scour.py @@ -59,14 +59,17 @@ import xml.dom.minidom import re import math import base64 -import os.path import urllib from svg_regex import svg_parser -from decimal import * import gzip +import optparse -# set precision to 5 decimal places by default -getcontext().prec = 5 +# Python 2.3- did not have Decimal +try: + from decimal import * +except ImportError: + from fixedpoint import * + Decimal = FixedPoint APP = 'scour' VER = '0.14' @@ -293,7 +296,7 @@ class Unit(object): MM = 8 IN = 9 - @staticmethod +# @staticmethod def get(str): # GZ: shadowing builtins like 'str' is generally bad form # GZ: encoding stuff like this in a dict makes for nicer code @@ -308,6 +311,8 @@ class Unit(object): elif str == 'mm': return Unit.MM elif str == 'in': return Unit.IN return Unit.INVALID + + get = staticmethod(get) class SVGLength(object): def __init__(self, str): @@ -839,7 +844,7 @@ def repairStyle(node, options): # now if any of the properties match known SVG attributes we prefer attributes # over style so emit them and remove them from the style map - if not '--disable-style-to-xml' in options: + if options.style_to_xml: for propName in styleMap.keys() : if propName in svgAttributes : node.setAttribute(propName, styleMap[propName]) @@ -1307,9 +1312,9 @@ def cleanPath(element) : def parseListOfPoints(s): """ - Parse string into a list of points. + Parse string into a list of points. - Returns a list of (x,y) tuples where x and y are strings + Returns a list of (x,y) tuples where x and y are strings """ # (wsp)? comma-or-wsp-separated coordinate pairs (wsp)? @@ -1329,7 +1334,7 @@ def parseListOfPoints(s): def cleanPolygon(elem): """ - Remove unnecessary closing point of polygon points attribute + Remove unnecessary closing point of polygon points attribute """ global numPointsRemovedFromPolygon @@ -1347,11 +1352,11 @@ def cleanPolygon(elem): def serializePath(pathObj): """ - Reserializes the path data with some cleanups: - - removes scientific notation (exponents) - - removes all trailing zeros after the decimal - - removes extraneous whitespace - - adds commas between values in a subcommand if required + Reserializes the path data with some cleanups: + - removes scientific notation (exponents) + - removes all trailing zeros after the decimal + - removes extraneous whitespace + - adds commas between values in a subcommand if required """ pathStr = "" for (cmd,data) in pathObj: @@ -1371,8 +1376,8 @@ def serializePath(pathObj): def embedRasters(element) : """ - Converts raster references to inline images. - NOTE: there are size limits to base64-encoding handling in browsers + Converts raster references to inline images. + NOTE: there are size limits to base64-encoding handling in browsers """ global numRastersEmbedded @@ -1463,7 +1468,10 @@ def properlySizeDoc(docElement): # this is the main method # input is a string representation of the input XML # returns a string representation of the output XML -def scourString(in_string, options=[]): +def scourString(in_string, options=None): + if options is None: + options = _options_parser.get_default_values() + getcontext().prec = options.digits global numAttrsRemoved global numStylePropsFixed global numElemsRemoved @@ -1493,7 +1501,7 @@ def scourString(in_string, options=[]): numStylePropsFixed = repairStyle(doc.documentElement, options) # convert colors to #RRGGBB format - if not '--disable-simplify-colors' in options: + if options.simple_colors: numBytesSavedInColors = convertColors(doc.documentElement) # remove empty defs, metadata, g @@ -1516,14 +1524,14 @@ def scourString(in_string, options=[]): while removeUnreferencedElements(doc) > 0: pass - if '--enable-id-stripping' in options: + if options.strip_ids: bContinueLooping = True while bContinueLooping: identifiedElements = findElementsWithId(doc.documentElement) referencedIDs = findReferencedElements(doc.documentElement) bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0) - if not '--disable-group-collapsing' in options: + if options.group_collapse: while removeNestedGroups(doc.documentElement) > 0: pass @@ -1571,135 +1579,121 @@ def scourString(in_string, options=[]): # used mostly by unit tests # input is a filename # returns the minidom doc representation of the SVG -def scourXmlFile(filename, options=[]): +def scourXmlFile(filename, options=None): in_string = open(filename).read() -# print 'IN=',in_string out_string = scourString(in_string, options) -# print 'OUT=',out_string return xml.dom.minidom.parseString(out_string.encode('utf-8')) -def printHeader(): - print APP , VER - print COPYRIGHT +# GZ: Seems most other commandline tools don't do this, is it really wanted? +class HeaderedFormatter(optparse.IndentedHelpFormatter): + """ + Show application name, version number, and copyright statement + above usage information. + """ + def format_usage(self, usage): + return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT, + optparse.IndentedHelpFormatter.format_usage(self, usage)) -def printSyntaxAndQuit(): - printHeader() - print 'usage: scour.py [-i input.svg] [-o output.svg] [OPTIONS]\n' - print 'If the input/output files are specified with a svgz extension, then compressed SVG is assumed.\n' - print 'If the input file is not specified, stdin is used.' - print 'If the output file is not specified, stdout is used.' - print 'If an option is not available below that means it occurs automatically' - print 'when scour is invoked. Available OPTIONS:\n' - print ' --disable-simplify-colors : Scour will not convert all colors to #RRGGBB format' - print ' --disable-style-to-xml : Scour will not convert style properties into XML attributes' - print ' --disable-group-collapsing : Scour will not collapse elements' - print ' --enable-id-stripping : Scour will remove all un-referenced ID attributes' - print ' --set-precision N : Scour will set the number of significant digits (default: 6)' - print '' - quit() +# GZ: would prefer this to be in a function or class scope, but tests etc need +# access to the defaults anyway +_options_parser = optparse.OptionParser( + usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]", + description=("If the input/output files are specified with a svgz" + " extension, then compressed SVG is assumed. If the input file is not" + " specified, stdin is used. If the output file is not specified, " + " stdout is used."), + formatter=HeaderedFormatter(max_help_position=30), + version=VER) -# returns a tuple with: -# input stream, output stream, a list of options specified on the command-line, -# input filename, and output filename -def parseCLA(): - args = sys.argv[1:] +_options_parser.add_option("--disable-simplify-colors", + action="store_false", dest="simple_colors", default=True, + help="won't convert all colors to #RRGGBB format") +_options_parser.add_option("--disable-style-to-xml", + action="store_false", dest="style_to_xml", default=True, + help="won't convert styles into XML attributes") +_options_parser.add_option("--disable-group-collapsing", + action="store_false", dest="group_collapse", default=True, + help="won't collapse elements") +_options_parser.add_option("--enable-id-stripping", + action="store_true", dest="strip_ids", default=False, + help="remove all un-referenced ID attributes") +# GZ: this is confusing, most people will be thinking in terms of +# decimal places, which is not what decimal precision is doing +_options_parser.add_option("-p", "--set-precision", + action="store", type=int, dest="digits", default=5, + help="set number of significant digits (default: %default)") +_options_parser.add_option("-i", + action="store", dest="infilename", help=optparse.SUPPRESS_HELP) +_options_parser.add_option("-o", + action="store", dest="outfilename", help=optparse.SUPPRESS_HELP) - # by default the input and output are the standard streams - inputfilename = '' - outputfilename = '' - input = sys.stdin - output = sys.stdout - options = [] - validOptions = [ - '--disable-simplify-colors', - '--disable-style-to-xml', - '--disable-group-collapsing', - '--enable-id-stripping', - '--set-precision', - ] - - i = 0 - while i < len(args): - arg = args[i] - i += 1 - if arg == '-i' : - if i < len(args) : - inputfilename = args[i] - if args[i][-5:] == '.svgz': - input = gzip.open(args[i], 'rb') - else: - input = open(args[i], 'r') - i += 1 - continue - else: - printSyntaxAndQuit() - elif arg == '-o' : - if i < len(args) : - outputfilename = args[i] - if args[i][-5:] == '.svgz': - output = gzip.open(args[i], 'wb') - else: - output = open(args[i], 'w') - i += 1 - continue - else: - printSyntaxAndQuit() - elif arg == '--set-precision': - if i < len(args): - getcontext().prec = int(args[i]) - i += 1 - continue - else: - printSyntaxAndQuit() - elif arg in validOptions : - options.append(arg) - else : - print 'Error! Invalid argument:', arg - printSyntaxAndQuit() - - return (input, output, options, inputfilename, outputfilename) +def maybe_gziped_file(filename, mode="r"): + if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"): + return gzip.GzipFile(filename, mode) + return file(filename, mode) + +def parse_args(args=None): + options, rargs = _options_parser.parse_args(args) + + if rargs: + _options_parser.error("Additional arguments not handled: %r, see --help" % rargs) + if options.digits < 0: + _options_parser.error("Can't have negative significant digits, see --help") + if options.infilename: + infile = maybe_gziped_file(options.infilename) + # GZ: could catch a raised IOError here and report + else: + # GZ: could sniff for gzip compression here + infile = sys.stdin + if options.outfilename: + outfile = maybe_gziped_file(options.outfilename, "w") + else: + outfile = sys.stdout + + return options, [infile, outfile] if __name__ == '__main__': + if sys.platform == "win32": + from time import clock as get_tick + else: + # GZ: is this different from time.time() in any way? + def get_tick(): + return os.times()[0] - startTimes = os.times() + start = get_tick() - (input, output, options, inputfilename, outputfilename) = parseCLA() + options, (input, output) = parse_args() - # if we are not sending to stdout, then print out app information - bOutputReport = False - if output != sys.stdout : - bOutputReport = True - printHeader() + print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT) # do the work in_string = input.read() - out_string = scourString(in_string, options) - output.write(out_string.encode("utf-8")) + out_string = scourString(in_string, options).encode("UTF-8") + output.write(out_string) # Close input and output files input.close() output.close() - endTimes = os.times() + end = get_tick() - # output some statistics if we are not using stdout - if bOutputReport : - if inputfilename != '': - print ' File:', inputfilename - print ' Time taken:', str(endTimes[0]-startTimes[0]) + 's' - print ' Number of elements removed:', numElemsRemoved - print ' Number of attributes removed:', numAttrsRemoved - print ' Number of unreferenced id attributes removed:', numIDsRemoved - print ' Number of style properties fixed:', numStylePropsFixed - print ' Number of raster images embedded inline:', numRastersEmbedded - print ' Number of path segments reduced/removed:', numPathSegmentsReduced - print ' Number of curves straightened:', numCurvesStraightened - print ' Number of bytes saved in path data:', numBytesSavedInPathData - print ' Number of bytes saved in colors:', numBytesSavedInColors - print ' Number of points removed from polygons:',numPointsRemovedFromPolygon - oldsize = os.path.getsize(inputfilename) - newsize = os.path.getsize(outputfilename) - sizediff = (newsize / oldsize) * 100; - print ' Original file size:', oldsize, 'bytes; new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)' + # GZ: unless silenced by -q or something? + # GZ: not using globals would be good too + print >>sys.stderr, ' File:', input.name, \ + '\n Time taken:', str(end-start) + 's', \ + '\n Number of elements removed:', numElemsRemoved, \ + '\n Number of attributes removed:', numAttrsRemoved, \ + '\n Number of unreferenced id attributes removed:', numIDsRemoved, \ + '\n Number of style properties fixed:', numStylePropsFixed, \ + '\n Number of raster images embedded inline:', numRastersEmbedded, \ + '\n Number of path segments reduced/removed:', numPathSegmentsReduced, \ + '\n Number of bytes saved in path data:', numBytesSavedInPathData, \ + '\n Number of bytes saved in colors:', numBytesSavedInColors, \ + '\n Number of points removed from polygons:',numPointsRemovedFromPolygon + oldsize = len(in_string) + newsize = len(out_string) + sizediff = (newsize / oldsize) * 100 + print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \ + 'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)' diff --git a/svg_regex.py b/svg_regex.py index 34899e3..10fd9c3 100644 --- a/svg_regex.py +++ b/svg_regex.py @@ -138,7 +138,8 @@ class SVGPathParser(object): 'a': self.rule_elliptical_arc, } - self.number_tokens = set(['int', 'float']) +# self.number_tokens = set(['int', 'float']) + self.number_tokens = list(['int', 'float']) def parse(self, text): """ Parse a string of SVG data. diff --git a/testscour.py b/testscour.py index 7ca09c4..60339a3 100755 --- a/testscour.py +++ b/testscour.py @@ -156,7 +156,8 @@ class KeepUnreferencedIDsWhenEnabled(unittest.TestCase): class RemoveUnreferencedIDsWhenEnabled(unittest.TestCase): def runTest(self): - doc = scour.scourXmlFile('unittests/ids-to-strip.svg', ['--enable-id-stripping']) + doc = scour.scourXmlFile('unittests/ids-to-strip.svg', + scour.parse_args(['--enable-id-stripping'])[0]) self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'svg')[0].getAttribute('id'), '', ' ID not stripped' ) @@ -168,7 +169,8 @@ class RemoveUselessNestedGroups(unittest.TestCase): class DoNotRemoveUselessNestedGroups(unittest.TestCase): def runTest(self): - doc = scour.scourXmlFile('unittests/nested-useless-groups.svg', ['--disable-group-collapsing']) + doc = scour.scourXmlFile('unittests/nested-useless-groups.svg', + scour.parse_args(['--disable-group-collapsing'])[0]) self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'g')), 2, 'Useless nested groups were removed despite --disable-group-collapsing' ) @@ -388,7 +390,8 @@ class RemoveFillOpacityWhenFillNone(unittest.TestCase): class ConvertFillPropertyToAttr(unittest.TestCase): def runTest(self): - doc = scour.scourXmlFile('unittests/fill-none.svg', '--disable-simplify-colors') + doc = scour.scourXmlFile('unittests/fill-none.svg', + scour.parse_args(['--disable-simplify-colors'])[0]) self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'path')[1].getAttribute('fill'), 'black', 'fill property not converted to XML attribute' ) diff --git a/web.py b/web.py new file mode 100644 index 0000000..3b61ff7 --- /dev/null +++ b/web.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Scour Web +# +# Copyright 2009 Jeff Schiller +# +# This file is part of Scour, http://www.codedread.com/scour/ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from mod_python import apache +from scour import scourString + +def form(req): + return """ + + + Scour it! + + +
    +

    Paste the SVG file here

    + +

    Click "Go!" to Scour

    + + + Scour it! + + + +

    Paste the SVG file here

    + +

    Click "Go!" to Scour