490 lines
16 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
"""
babel.numbers
~~~~~~~~~~~~~
CLDR Plural support. See UTS #35.
:copyright: (c) 2013 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
import re
_plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
_fallback_tag = 'other'
class PluralRule(object):
"""Represents a set of language pluralization rules. The constructor
accepts a list of (tag, expr) tuples or a dict of CLDR rules. The
resulting object is callable and accepts one parameter with a positive or
negative number (both integer and float) for the number that indicates the
plural form for a string and returns the tag for the format:
>>> rule = PluralRule({'one': 'n is 1'})
>>> rule(1)
'one'
>>> rule(2)
'other'
Currently the CLDR defines these tags: zero, one, two, few, many and
other where other is an implicit default. Rules should be mutually
exclusive; for a given numeric value, only one rule should apply (i.e.
the condition should only be true for one of the plural rule elements.
"""
__slots__ = ('abstract', '_func')
def __init__(self, rules):
"""Initialize the rule instance.
:param rules: a list of ``(tag, expr)``) tuples with the rules
conforming to UTS #35 or a dict with the tags as keys
and expressions as values.
:raise RuleError: if the expression is malformed
"""
if isinstance(rules, dict):
rules = rules.items()
found = set()
self.abstract = []
for key, expr in sorted(list(rules)):
if key not in _plural_tags:
raise ValueError('unknown tag %r' % key)
elif key in found:
raise ValueError('tag %r defined twice' % key)
found.add(key)
self.abstract.append((key, _Parser(expr).ast))
def __repr__(self):
rules = self.rules
return '<%s %r>' % (
type(self).__name__,
', '.join(['%s: %s' % (tag, rules[tag]) for tag in _plural_tags
if tag in rules])
)
@classmethod
def parse(cls, rules):
"""Create a `PluralRule` instance for the given rules. If the rules
are a `PluralRule` object, that object is returned.
:param rules: the rules as list or dict, or a `PluralRule` object
:raise RuleError: if the expression is malformed
"""
if isinstance(rules, cls):
return rules
return cls(rules)
@property
def rules(self):
"""The `PluralRule` as a dict of unicode plural rules.
>>> rule = PluralRule({'one': 'n is 1'})
>>> rule.rules
{'one': 'n is 1'}
"""
_compile = _UnicodeCompiler().compile
return dict([(tag, _compile(ast)) for tag, ast in self.abstract])
tags = property(lambda x: frozenset([i[0] for i in x.abstract]), doc="""
A set of explicitly defined tags in this rule. The implicit default
``'other'`` rules is not part of this set unless there is an explicit
rule for it.""")
def __getstate__(self):
return self.abstract
def __setstate__(self, abstract):
self.abstract = abstract
def __call__(self, n):
if not hasattr(self, '_func'):
self._func = to_python(self)
return self._func(n)
def to_javascript(rule):
"""Convert a list/dict of rules or a `PluralRule` object into a JavaScript
function. This function depends on no external library:
>>> to_javascript({'one': 'n is 1'})
"(function(n) { return (n == 1) ? 'one' : 'other'; })"
Implementation detail: The function generated will probably evaluate
expressions involved into range operations multiple times. This has the
advantage that external helper functions are not required and is not a
big performance hit for these simple calculations.
:param rule: the rules as list or dict, or a `PluralRule` object
:raise RuleError: if the expression is malformed
"""
to_js = _JavaScriptCompiler().compile
result = ['(function(n) { return ']
for tag, ast in PluralRule.parse(rule).abstract:
result.append('%s ? %r : ' % (to_js(ast), tag))
result.append('%r; })' % _fallback_tag)
return ''.join(result)
def to_python(rule):
"""Convert a list/dict of rules or a `PluralRule` object into a regular
Python function. This is useful in situations where you need a real
function and don't are about the actual rule object:
>>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'})
>>> func(1)
'one'
>>> func(3)
'few'
>>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
>>> func(11)
'one'
>>> func(15)
'few'
:param rule: the rules as list or dict, or a `PluralRule` object
:raise RuleError: if the expression is malformed
"""
namespace = {
'IN': in_range_list,
'WITHIN': within_range_list,
'MOD': cldr_modulo
}
to_python = _PythonCompiler().compile
result = ['def evaluate(n):']
for tag, ast in PluralRule.parse(rule).abstract:
# the str() call is to coerce the tag to the native string. It's
# a limited ascii restricted set of tags anyways so that is fine.
result.append(' if (%s): return %r' % (to_python(ast), str(tag)))
result.append(' return %r' % _fallback_tag)
code = compile('\n'.join(result), '<rule>', 'exec')
eval(code, namespace)
return namespace['evaluate']
def to_gettext(rule):
"""The plural rule as gettext expression. The gettext expression is
technically limited to integers and returns indices rather than tags.
>>> to_gettext({'one': 'n is 1', 'two': 'n is 2'})
'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2)'
:param rule: the rules as list or dict, or a `PluralRule` object
:raise RuleError: if the expression is malformed
"""
rule = PluralRule.parse(rule)
used_tags = rule.tags | set([_fallback_tag])
_compile = _GettextCompiler().compile
_get_index = [tag for tag in _plural_tags if tag in used_tags].index
result = ['nplurals=%d; plural=(' % len(used_tags)]
for tag, ast in rule.abstract:
result.append('%s ? %d : ' % (_compile(ast), _get_index(tag)))
result.append('%d)' % _get_index(_fallback_tag))
return ''.join(result)
def in_range_list(num, range_list):
"""Integer range list test. This is the callback for the "in" operator
of the UTS #35 pluralization rule language:
>>> in_range_list(1, [(1, 3)])
True
>>> in_range_list(3, [(1, 3)])
True
>>> in_range_list(3, [(1, 3), (5, 8)])
True
>>> in_range_list(1.2, [(1, 4)])
False
>>> in_range_list(10, [(1, 4)])
False
>>> in_range_list(10, [(1, 4), (6, 8)])
False
"""
return num == int(num) and within_range_list(num, range_list)
def within_range_list(num, range_list):
"""Float range test. This is the callback for the "within" operator
of the UTS #35 pluralization rule language:
>>> within_range_list(1, [(1, 3)])
True
>>> within_range_list(1.0, [(1, 3)])
True
>>> within_range_list(1.2, [(1, 4)])
True
>>> within_range_list(8.8, [(1, 4), (7, 15)])
True
>>> within_range_list(10, [(1, 4)])
False
>>> within_range_list(10.5, [(1, 4), (20, 30)])
False
"""
return any(num >= min_ and num <= max_ for min_, max_ in range_list)
def cldr_modulo(a, b):
"""Javaish modulo. This modulo operator returns the value with the sign
of the dividend rather than the divisor like Python does:
>>> cldr_modulo(-3, 5)
-3
>>> cldr_modulo(-3, -5)
-3
>>> cldr_modulo(3, 5)
3
"""
reverse = 0
if a < 0:
a *= -1
reverse = 1
if b < 0:
b *= -1
rv = a % b
if reverse:
rv *= -1
return rv
class RuleError(Exception):
"""Raised if a rule is malformed."""
class _Parser(object):
"""Internal parser. This class can translate a single rule into an abstract
tree of tuples. It implements the following grammar::
condition = and_condition ('or' and_condition)*
and_condition = relation ('and' relation)*
relation = is_relation | in_relation | within_relation | 'n' <EOL>
is_relation = expr 'is' ('not')? value
in_relation = expr ('not')? 'in' range_list
within_relation = expr ('not')? 'within' range_list
expr = 'n' ('mod' value)?
range_list = (range | value) (',' range_list)*
value = digit+
digit = 0|1|2|3|4|5|6|7|8|9
range = value'..'value
- Whitespace can occur between or around any of the above tokens.
- Rules should be mutually exclusive; for a given numeric value, only one
rule should apply (i.e. the condition should only be true for one of
the plural rule elements).
- The in and within relations can take comma-separated lists, such as:
'n in 3,5,7..15'.
The translator parses the expression on instanciation into an attribute
called `ast`.
"""
_rules = [
(None, re.compile(r'\s+(?u)')),
('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|n)\b')),
('value', re.compile(r'\d+')),
('comma', re.compile(r',')),
('ellipsis', re.compile(r'\.\.'))
]
def __init__(self, string):
string = string.lower()
result = []
pos = 0
end = len(string)
while pos < end:
for tok, rule in self._rules:
match = rule.match(string, pos)
if match is not None:
pos = match.end()
if tok:
result.append((tok, match.group()))
break
else:
raise RuleError('malformed CLDR pluralization rule. '
'Got unexpected %r' % string[pos])
self.tokens = result[::-1]
self.ast = self.condition()
if self.tokens:
raise RuleError('Expected end of rule, got %r' %
self.tokens[-1][1])
def test(self, type, value=None):
return self.tokens and self.tokens[-1][0] == type and \
(value is None or self.tokens[-1][1] == value)
def skip(self, type, value=None):
if self.test(type, value):
return self.tokens.pop()
def expect(self, type, value=None, term=None):
token = self.skip(type, value)
if token is not None:
return token
if term is None:
term = repr(value is None and type or value)
if not self.tokens:
raise RuleError('expected %s but end of rule reached' % term)
raise RuleError('expected %s but got %r' % (term, self.tokens[-1][1]))
def condition(self):
op = self.and_condition()
while self.skip('word', 'or'):
op = 'or', (op, self.and_condition())
return op
def and_condition(self):
op = self.relation()
while self.skip('word', 'and'):
op = 'and', (op, self.relation())
return op
def relation(self):
left = self.expr()
if self.skip('word', 'is'):
return self.skip('word', 'not') and 'isnot' or 'is', \
(left, self.value())
negated = self.skip('word', 'not')
method = 'in'
if self.skip('word', 'within'):
method = 'within'
else:
self.expect('word', 'in', term="'within' or 'in'")
rv = 'relation', (method, left, self.range_list())
if negated:
rv = 'not', (rv,)
return rv
def range_or_value(self):
left = self.value()
if self.skip('ellipsis'):
return((left, self.value()))
else:
return((left, left))
def range_list(self):
range_list = [self.range_or_value()]
while self.skip('comma'):
range_list.append(self.range_or_value())
return 'range_list', range_list
def expr(self):
self.expect('word', 'n')
if self.skip('word', 'mod'):
return 'mod', (('n', ()), self.value())
return 'n', ()
def value(self):
return 'value', (int(self.expect('value')[1]),)
def _binary_compiler(tmpl):
"""Compiler factory for the `_Compiler`."""
return lambda self, l, r: tmpl % (self.compile(l), self.compile(r))
def _unary_compiler(tmpl):
"""Compiler factory for the `_Compiler`."""
return lambda self, x: tmpl % self.compile(x)
class _Compiler(object):
"""The compilers are able to transform the expressions into multiple
output formats.
"""
def compile(self, arg):
op, args = arg
return getattr(self, 'compile_' + op)(*args)
compile_n = lambda x: 'n'
compile_value = lambda x, v: str(v)
compile_and = _binary_compiler('(%s && %s)')
compile_or = _binary_compiler('(%s || %s)')
compile_not = _unary_compiler('(!%s)')
compile_mod = _binary_compiler('(%s %% %s)')
compile_is = _binary_compiler('(%s == %s)')
compile_isnot = _binary_compiler('(%s != %s)')
def compile_relation(self, method, expr, range_list):
raise NotImplementedError()
class _PythonCompiler(_Compiler):
"""Compiles an expression to Python."""
compile_and = _binary_compiler('(%s and %s)')
compile_or = _binary_compiler('(%s or %s)')
compile_not = _unary_compiler('(not %s)')
compile_mod = _binary_compiler('MOD(%s, %s)')
def compile_relation(self, method, expr, range_list):
compile_range_list = '[%s]' % ','.join(
['(%s, %s)' % tuple(map(self.compile, range_))
for range_ in range_list[1]])
return '%s(%s, %s)' % (method.upper(), self.compile(expr),
compile_range_list)
class _GettextCompiler(_Compiler):
"""Compile into a gettext plural expression."""
def compile_relation(self, method, expr, range_list):
rv = []
expr = self.compile(expr)
for item in range_list[1]:
if item[0] == item[1]:
rv.append('(%s == %s)' % (
expr,
self.compile(item[0])
))
else:
min, max = map(self.compile, item)
rv.append('(%s >= %s && %s <= %s)' % (
expr,
min,
expr,
max
))
return '(%s)' % ' || '.join(rv)
class _JavaScriptCompiler(_GettextCompiler):
"""Compiles the expression to plain of JavaScript."""
def compile_relation(self, method, expr, range_list):
code = _GettextCompiler.compile_relation(
self, method, expr, range_list)
if method == 'in':
expr = self.compile(expr)
code = '(parseInt(%s) == %s && %s)' % (expr, expr, code)
return code
class _UnicodeCompiler(_Compiler):
"""Returns a unicode pluralization rule again."""
compile_is = _binary_compiler('%s is %s')
compile_isnot = _binary_compiler('%s is not %s')
compile_and = _binary_compiler('%s and %s')
compile_or = _binary_compiler('%s or %s')
compile_mod = _binary_compiler('%s mod %s')
def compile_not(self, relation):
return self.compile_relation(negated=True, *relation[1])
def compile_relation(self, method, expr, range_list, negated=False):
ranges = []
for item in range_list[1]:
if item[0] == item[1]:
ranges.append(self.compile(item[0]))
else:
ranges.append('%s..%s' % tuple(map(self.compile, item)))
return '%s%s %s %s' % (
self.compile(expr), negated and ' not' or '',
method, ','.join(ranges)
)