Skip to content

Commit

Permalink
Issue python#13706: Fix format(int, "n") for locale with non-ASCII th…
Browse files Browse the repository at this point in the history
…ousands separator

 * Decode thousands separator and decimal point using PyUnicode_DecodeLocale()
   (from the locale encoding), instead of decoding them implicitly from latin1
 * Remove _PyUnicode_InsertThousandsGroupingLocale(), it was not used
 * Change _PyUnicode_InsertThousandsGrouping() API to return the maximum
   character if unicode is NULL
 * Replace MIN/MAX macros by Py_MIN/Py_MAX
 * stringlib/undef.h undefines STRINGLIB_IS_UNICODE
 * stringlib/localeutil.h only supports Unicode
  • Loading branch information
Victor Stinner committed Feb 23, 2012
1 parent dcb30cf commit 41a863c
Show file tree
Hide file tree
Showing 12 changed files with 188 additions and 151 deletions.
18 changes: 3 additions & 15 deletions Include/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -1936,32 +1936,20 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
);
#endif

/* Using the current locale, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */

#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
Py_ssize_t n_buffer,
Py_UNICODE *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width);
#endif

/* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
PyObject *unicode,
int kind,
void *buffer,
Py_ssize_t index,
Py_ssize_t n_buffer,
void *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
const char *thousands_sep);
PyObject *thousands_sep,
Py_UCS4 *maxchar);
#endif
/* === Characters Type APIs =============================================== */

Expand Down
15 changes: 15 additions & 0 deletions Lib/test/test_format.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from test.support import verbose, TestFailed
import locale
import sys
import test.support as support
import unittest
Expand Down Expand Up @@ -282,6 +283,20 @@ def test_non_ascii(self):
self.assertEqual(format(1+2j, "\u2007^8"), "\u2007(1+2j)\u2007")
self.assertEqual(format(0j, "\u2007^4"), "\u20070j\u2007")

def test_locale(self):
try:
oldloc = locale.setlocale(locale.LC_ALL, '')
except locale.Error as err:
self.skipTest("Cannot set locale: {}".format(err))
try:
sep = locale.localeconv()['thousands_sep']
text = format(123456789, "n")
self.assertIn(sep, text)
self.assertEqual(text.replace(sep, ''), '123456789')
finally:
locale.setlocale(locale.LC_ALL, oldloc)



def test_main():
support.run_unittest(FormatTest)
Expand Down
3 changes: 0 additions & 3 deletions Objects/stringlib/asciilib.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,9 @@
#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale

#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII

#define _Py_InsertThousandsGrouping _PyUnicode_ascii_InsertThousandsGrouping
#define _Py_InsertThousandsGroupingLocale _PyUnicode_ascii_InsertThousandsGroupingLocale

71 changes: 22 additions & 49 deletions Objects/stringlib/localeutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

#include <locale.h>

#define MAX(x, y) ((x) < (y) ? (y) : (x))
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#ifndef STRINGLIB_IS_UNICODE
# error "localeutil is specific to Unicode"
#endif

typedef struct {
const char *grouping;
Expand Down Expand Up @@ -46,7 +47,7 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
are optional, depending on when we're called. */
static void
STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep,
Py_ssize_t n_chars, Py_ssize_t n_zeros, STRINGLIB_CHAR* thousands_sep,
Py_ssize_t thousands_sep_len)
{
Py_ssize_t i;
Expand All @@ -55,15 +56,8 @@ STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
*buffer_end -= thousands_sep_len;

/* Copy the thousands_sep chars into the buffer. */
#if STRINGLIB_IS_UNICODE
/* Convert from the char's of the thousands_sep from
the locale into unicode. */
for (i = 0; i < thousands_sep_len; ++i)
(*buffer_end)[i] = thousands_sep[i];
#else
/* No conversion, just memcpy the thousands_sep. */
memcpy(*buffer_end, thousands_sep, thousands_sep_len);
#endif
memcpy(*buffer_end, thousands_sep,
thousands_sep_len * STRINGLIB_SIZEOF_CHAR);
}

*buffer_end -= n_chars;
Expand All @@ -76,7 +70,7 @@ STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
}

/**
* _Py_InsertThousandsGrouping:
* InsertThousandsGrouping:
* @buffer: A pointer to the start of a string.
* @n_buffer: Number of characters in @buffer.
* @digits: A pointer to the digits we're reading from. If count
Expand Down Expand Up @@ -106,13 +100,15 @@ STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
_insert_thousands_sep().
**/
Py_ssize_t
_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
const char *thousands_sep)
STRINGLIB(InsertThousandsGrouping)(
STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
STRINGLIB_CHAR *thousands_sep,
Py_ssize_t thousands_sep_len)
{
Py_ssize_t count = 0;
Py_ssize_t n_zeros;
Expand All @@ -124,7 +120,6 @@ _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
STRINGLIB_CHAR *digits_end = NULL;
Py_ssize_t l;
Py_ssize_t n_chars;
Py_ssize_t thousands_sep_len = strlen(thousands_sep);
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
be looked at */
/* A generator that returns all of the grouping widths, until it
Expand All @@ -138,9 +133,9 @@ _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
}

while ((l = STRINGLIB(GroupGenerator_next)(&groupgen)) > 0) {
l = MIN(l, MAX(MAX(remaining, min_width), 1));
n_zeros = MAX(0, l - remaining);
n_chars = MAX(0, MIN(remaining, l));
l = Py_MIN(l, Py_MAX(Py_MAX(remaining, min_width), 1));
n_zeros = Py_MAX(0, l - remaining);
n_chars = Py_MAX(0, Py_MIN(remaining, l));

/* Use n_zero zero's and n_chars chars */

Expand Down Expand Up @@ -168,9 +163,9 @@ _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
if (!loop_broken) {
/* We left the loop without using a break statement. */

l = MAX(MAX(remaining, min_width), 1);
n_zeros = MAX(0, l - remaining);
n_chars = MAX(0, MIN(remaining, l));
l = Py_MAX(Py_MAX(remaining, min_width), 1);
n_zeros = Py_MAX(0, l - remaining);
n_chars = Py_MAX(0, Py_MIN(remaining, l));

/* Use n_zero zero's and n_chars chars */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
Expand All @@ -183,25 +178,3 @@ _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
return count;
}

/**
* _Py_InsertThousandsGroupingLocale:
* @buffer: A pointer to the start of a string.
* @n_digits: The number of digits in the string, in which we want
* to put the grouping chars.
*
* Reads thee current locale and calls _Py_InsertThousandsGrouping().
**/
Py_ssize_t
_Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width)
{
struct lconv *locale_data = localeconv();
const char *grouping = locale_data->grouping;
const char *thousands_sep = locale_data->thousands_sep;

return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits,
min_width, grouping, thousands_sep);
}
2 changes: 0 additions & 2 deletions Objects/stringlib/stringdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,5 @@
#define STRINGLIB_CHECK PyBytes_Check
#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_GROUPING _PyBytes_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyBytes_InsertThousandsGroupingLocale
#define STRINGLIB_TOASCII PyObject_Repr
#endif /* !STRINGLIB_STRINGDEFS_H */
3 changes: 0 additions & 3 deletions Objects/stringlib/ucs1lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,10 @@
#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale

#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII

#define _Py_InsertThousandsGrouping _PyUnicode_ucs1_InsertThousandsGrouping
#define _Py_InsertThousandsGroupingLocale _PyUnicode_ucs1_InsertThousandsGroupingLocale


3 changes: 0 additions & 3 deletions Objects/stringlib/ucs2lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,9 @@
#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale

#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII

#define _Py_InsertThousandsGrouping _PyUnicode_ucs2_InsertThousandsGrouping
#define _Py_InsertThousandsGroupingLocale _PyUnicode_ucs2_InsertThousandsGroupingLocale

3 changes: 0 additions & 3 deletions Objects/stringlib/ucs4lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,9 @@
#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale

#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII

#define _Py_InsertThousandsGrouping _PyUnicode_ucs4_InsertThousandsGrouping
#define _Py_InsertThousandsGroupingLocale _PyUnicode_ucs4_InsertThousandsGroupingLocale

2 changes: 1 addition & 1 deletion Objects/stringlib/undef.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
#undef STRINGLIB_NEW
#undef STRINGLIB_RESIZE
#undef _Py_InsertThousandsGrouping
#undef _Py_InsertThousandsGroupingLocale
#undef STRINGLIB_IS_UNICODE

2 changes: 0 additions & 2 deletions Objects/stringlib/unicodedefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
#define STRINGLIB_RESIZE PyUnicode_Resize
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale

#if PY_VERSION_HEX < 0x03000000
#define STRINGLIB_TOSTR PyObject_Unicode
Expand Down
75 changes: 58 additions & 17 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -9151,34 +9151,75 @@ any_find_slice(int direction, PyObject* s1, PyObject* s2,
}

Py_ssize_t
_PyUnicode_InsertThousandsGrouping(PyObject *unicode, int kind, void *data,
Py_ssize_t n_buffer,
void *digits, Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
const char *thousands_sep)
{
_PyUnicode_InsertThousandsGrouping(
PyObject *unicode, Py_ssize_t index,
Py_ssize_t n_buffer,
void *digits, Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping, PyObject *thousands_sep,
Py_UCS4 *maxchar)
{
unsigned int kind, thousands_sep_kind;
void *data, *thousands_sep_data;
Py_ssize_t thousands_sep_len;
Py_ssize_t len;

if (unicode != NULL) {
kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode) + index * kind;
}
else {
kind = PyUnicode_1BYTE_KIND;
data = NULL;
}
thousands_sep_kind = PyUnicode_KIND(thousands_sep);
thousands_sep_data = PyUnicode_DATA(thousands_sep);
thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep);
if (unicode != NULL && thousands_sep_kind != kind) {
thousands_sep_data = _PyUnicode_AsKind(thousands_sep, kind);
if (!thousands_sep_data)
return -1;
}

switch (kind) {
case PyUnicode_1BYTE_KIND:
if (unicode != NULL && PyUnicode_IS_ASCII(unicode))
return _PyUnicode_ascii_InsertThousandsGrouping(
len = asciilib_InsertThousandsGrouping(
(Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits,
min_width, grouping, thousands_sep);
min_width, grouping,
thousands_sep_data, thousands_sep_len);
else
return _PyUnicode_ucs1_InsertThousandsGrouping(
len = ucs1lib_InsertThousandsGrouping(
(Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits,
min_width, grouping, thousands_sep);
min_width, grouping,
thousands_sep_data, thousands_sep_len);
break;
case PyUnicode_2BYTE_KIND:
return _PyUnicode_ucs2_InsertThousandsGrouping(
len = ucs2lib_InsertThousandsGrouping(
(Py_UCS2*)data, n_buffer, (Py_UCS2*)digits, n_digits,
min_width, grouping, thousands_sep);
min_width, grouping,
thousands_sep_data, thousands_sep_len);
break;
case PyUnicode_4BYTE_KIND:
return _PyUnicode_ucs4_InsertThousandsGrouping(
len = ucs4lib_InsertThousandsGrouping(
(Py_UCS4*)data, n_buffer, (Py_UCS4*)digits, n_digits,
min_width, grouping, thousands_sep);
min_width, grouping,
thousands_sep_data, thousands_sep_len);
break;
default:
assert(0);
return -1;
}
assert(0);
return -1;
if (unicode != NULL && thousands_sep_kind != kind)
PyMem_Free(thousands_sep_data);
if (unicode == NULL) {
*maxchar = 127;
if (len != n_digits) {
*maxchar = Py_MAX(*maxchar,
PyUnicode_MAX_CHAR_VALUE(thousands_sep));
}
}
return len;
}


Expand Down
Loading

0 comments on commit 41a863c

Please sign in to comment.