Skip to content

Commit

Permalink
Patch #633547: Support plural forms. Do TODOs in test suite.
Browse files Browse the repository at this point in the history
  • Loading branch information
loewis committed Nov 21, 2002
1 parent 21b6014 commit d899605
Show file tree
Hide file tree
Showing 6 changed files with 431 additions and 163 deletions.
45 changes: 45 additions & 0 deletions Doc/lib/libgettext.tex
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,32 @@ \subsection{GNU \program{gettext} API}
\var{domain}.
\end{funcdesc}

\begin{funcdesc}{ngettext}{singular, plural, n}

Like \function{gettext()}, but consider plural forms. If a translation
is found, apply the plural formula to \var{n}, and return the
resulting message (some languages have more than two plural forms).
If no translation is found, return \var{singular} if \var{n} is 1;
return \var{plural} otherwise.

The Plural formula is taken from the catalog header. It is a C or
Python expression that has a free variable n; the expression evaluates
to the index of the plural in the catalog. See the GNU gettext
documentation for the precise syntax to be used in .po files, and the
formulas for a variety of languages.

\versionadded{2.3}

\end{funcdesc}

\begin{funcdesc}{dngettext}{domain, singular, plural, n}
Like \function{ngettext()}, but look the message up in the specified
\var{domain}.

\versionadded{2.3}
\end{funcdesc}


Note that GNU \program{gettext} also defines a \function{dcgettext()}
method, but this was deemed not useful and so it is currently
unimplemented.
Expand Down Expand Up @@ -207,6 +233,21 @@ \subsubsection{The \class{NullTranslations} class}
Overridden in derived classes.
\end{methoddesc}

\begin{methoddesc}[NullTranslations]{ngettext}{singular, plural, n}
If a fallback has been set, forward \method{ngettext} to the fallback.
Otherwise, return the translated message. Overridden in derived classes.

\versionadded{2.3}
\end{methoddesc}

\begin{methoddesc}[NullTranslations]{ungettext}{singular, plural, n}
If a fallback has been set, forward \method{ungettext} to the fallback.
Otherwise, return the translated message as a Unicode string.
Overridden in derived classes.

\versionadded{2.3}
\end{methoddesc}

\begin{methoddesc}[NullTranslations]{info}{}
Return the ``protected'' \member{_info} variable.
\end{methoddesc}
Expand Down Expand Up @@ -263,6 +304,9 @@ \subsubsection{The \class{GNUTranslations} class}
and the value of the ``protected'' \member{_charset} variable to the
builtin \function{unicode()} function.

To facilitate plural forms, the methods \method{ngettext} and
\method{ungettext} are overridden as well.

\subsubsection{Solaris message catalog support}

The Solaris operating system defines its own binary
Expand Down Expand Up @@ -534,6 +578,7 @@ \subsection{Acknowledgements}
\begin{itemize}
\item Peter Funk
\item James Henstridge
\Juan David Ib\'a\~nez Palomar
\item Marc-Andr\'e Lemburg
\item Martin von L\"owis
\item Fran\c cois Pinard
Expand Down
140 changes: 135 additions & 5 deletions Lib/gettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
# module.
#
# J. David Ibanez implemented plural forms.
#
# TODO:
# - Lazy loading of .mo files. Currently the entire catalog is loaded into
# memory, but that's probably bad for large translated programs. Instead,
Expand All @@ -43,18 +45,76 @@
# - Support Solaris .mo file formats. Unfortunately, we've been unable to
# find this format documented anywhere.

import os
import sys
import struct
import copy

import copy, os, re, struct, sys
from errno import ENOENT


__all__ = ["bindtextdomain","textdomain","gettext","dgettext",
"find","translation","install","Catalog"]

_default_localedir = os.path.join(sys.prefix, 'share', 'locale')


def test(condition, true, false):
"""
Implements the C expression:
condition ? true : false
Required to correctly interpret plural forms.
"""
if condition:
return true
else:
return false


def c2py(plural):
"""
Gets a C expression as used in PO files for plural forms and
returns a Python lambda function that implements an equivalent
expression.
"""
# Security check, allow only the "n" identifier
from StringIO import StringIO
import token, tokenize
tokens = tokenize.generate_tokens(StringIO(plural).readline)
danger = [ x for x in tokens if x[0] == token.NAME and x[1] != 'n' ]
if danger:
raise ValueError, 'dangerous expression'

# Replace some C operators by their Python equivalents
plural = plural.replace('&&', ' and ')
plural = plural.replace('||', ' or ')

expr = re.compile(r'\![^=]')
plural = expr.sub(' not ', plural)

# Regular expression and replacement function used to transform
# "a?b:c" to "test(a,b,c)".
expr = re.compile(r'(.*?)\?(.*?):(.*)')
def repl(x):
return "test(%s, %s, %s)" % (x.group(1), x.group(2),
expr.sub(repl, x.group(3)))

# Code to transform the plural expression, taking care of parentheses
stack = ['']
for c in plural:
if c == '(':
stack.append('')
elif c == ')':
if len(stack) == 0:
raise ValueError, 'unbalanced parenthesis in plural form'
s = expr.sub(repl, stack.pop())
stack[-1] += '(%s)' % s
else:
stack[-1] += c
plural = expr.sub(repl, stack.pop())

return eval('lambda n: int(%s)' % plural)



def _expand_lang(locale):
from locale import normalize
Expand Down Expand Up @@ -121,11 +181,27 @@ def gettext(self, message):
return self._fallback.gettext(message)
return message

def ngettext(self, msgid1, msgid2, n):
if self._fallback:
return self._fallback.ngettext(msgid1, msgid2, n)
if n == 1:
return msgid1
else:
return msgid2

def ugettext(self, message):
if self._fallback:
return self._fallback.ugettext(message)
return unicode(message)

def ungettext(self, msgid1, msgid2, n):
if self._fallback:
return self._fallback.ungettext(msgid1, msgid2, n)
if n == 1:
return unicode(msgid1)
else:
return unicode(msgid2)

def info(self):
return self._info

Expand Down Expand Up @@ -169,8 +245,16 @@ def _parse(self, fp):
tlen, toff = unpack(ii, buf[transidx:transidx+8])
tend = toff + tlen
if mend < buflen and tend < buflen:
msg = buf[moff:mend]
tmsg = buf[toff:tend]
catalog[buf[moff:mend]] = tmsg
if msg.find('\x00') >= 0:
# Plural forms
msgid1, msgid2 = msg.split('\x00')
tmsg = tmsg.split('\x00')
for i in range(len(tmsg)):
catalog[(msgid1, i)] = tmsg[i]
else:
catalog[msg] = tmsg
else:
raise IOError(0, 'File is corrupt', filename)
# See if we're looking at GNU .mo conventions for metadata
Expand All @@ -186,6 +270,12 @@ def _parse(self, fp):
self._info[k] = v
if k == 'content-type':
self._charset = v.split('charset=')[1]
elif k == 'plural-forms':
v = v.split(';')
## nplurals = v[0].split('nplurals=')[1]
## nplurals = int(nplurals.strip())
plural = v[1].split('plural=')[1]
self.plural = c2py(plural)
# advance to next entry in the seek tables
masteridx += 8
transidx += 8
Expand All @@ -198,6 +288,19 @@ def gettext(self, message):
return self._fallback.gettext(message)
return message


def ngettext(self, msgid1, msgid2, n):
try:
return self._catalog[(msgid1, self.plural(n))]
except KeyError:
if self._fallback:
return self._fallback.ngettext(msgid1, msgid2, n)
if n == 1:
return msgid1
else:
return msgid2


def ugettext(self, message):
try:
tmsg = self._catalog[message]
Expand All @@ -208,6 +311,18 @@ def ugettext(self, message):
return unicode(tmsg, self._charset)


def ungettext(self, msgid1, msgid2, n):
try:
tmsg = self._catalog[(msgid1, self.plural(n))]
except KeyError:
if self._fallback:
return self._fallback.ungettext(msgid1, msgid2, n)
if n == 1:
tmsg = msgid1
else:
tmsg = msgid2
return unicode(tmsg, self._charset)


# Locate a .mo file using the gettext strategy
def find(domain, localedir=None, languages=None, all=0):
Expand Down Expand Up @@ -311,10 +426,25 @@ def dgettext(domain, message):
return t.gettext(message)


def dngettext(domain, msgid1, msgid2, n):
try:
t = translation(domain, _localedirs.get(domain, None))
except IOError:
if n == 1:
return msgid1
else:
return msgid2
return t.ngettext(msgid1, msgid2, n)


def gettext(message):
return dgettext(_current_domain, message)


def ngettext(msgid1, msgid2, n):
return dngettext(_current_domain, msgid1, msgid2, n)


# dcgettext() has been deemed unnecessary and is not implemented.

# James Henstridge's Catalog constructor from GNOME gettext. Documented usage
Expand Down
46 changes: 0 additions & 46 deletions Lib/test/output/test_gettext

This file was deleted.

Loading

0 comments on commit d899605

Please sign in to comment.