@@ -174,25 +174,24 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
174
174
static PyObject *
175
175
u_getitem(arrayobject *ap, Py_ssize_t i)
176
176
{
177
-
return PyUnicode_FromOrdinal(((Py_UCS4 *) ap->ob_item)[i]);
177
+
return PyUnicode_FromUnicode(&((Py_UNICODE *) ap->ob_item)[i], 1);
178
178
}
179
179
180
180
static int
181
181
u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
182
182
{
183
-
PyObject *p;
183
+
Py_UNICODE *p;
184
+
Py_ssize_t len;
184
185
185
-
if (!PyArg_Parse(v, "U;array item must be unicode character", &p))
186
-
return -1;
187
-
if (PyUnicode_READY(p))
186
+
if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len))
188
187
return -1;
189
-
if (PyUnicode_GET_LENGTH(p) != 1) {
188
+
if (len != 1) {
190
189
PyErr_SetString(PyExc_TypeError,
191
190
"array item must be unicode character");
192
191
return -1;
193
192
}
194
193
if (i >= 0)
195
-
((Py_UCS4 *)ap->ob_item)[i] = PyUnicode_READ_CHAR(p, 0);
194
+
((Py_UNICODE *)ap->ob_item)[i] = p[0];
196
195
return 0;
197
196
}
198
197
@@ -444,13 +443,6 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
444
443
return 0;
445
444
}
446
445
447
-
#if SIZEOF_INT == 4
448
-
# define STRUCT_LONG_FORMAT "I"
449
-
#elif SIZEOF_LONG == 4
450
-
# define STRUCT_LONG_FORMAT "L"
451
-
#else
452
-
# error "Unable to get struct format for Py_UCS4"
453
-
#endif
454
446
455
447
/* Description of types.
456
448
*
@@ -460,7 +452,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
460
452
static struct arraydescr descriptors[] = {
461
453
{'b', 1, b_getitem, b_setitem, "b", 1, 1},
462
454
{'B', 1, BB_getitem, BB_setitem, "B", 1, 0},
463
-
{'u', sizeof(Py_UCS4), u_getitem, u_setitem, STRUCT_LONG_FORMAT, 0, 0},
455
+
{'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u", 0, 0},
464
456
{'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1},
465
457
{'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0},
466
458
{'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1},
@@ -1519,26 +1511,25 @@ This method is deprecated. Use tobytes instead.");
1519
1511
static PyObject *
1520
1512
array_fromunicode(arrayobject *self, PyObject *args)
1521
1513
{
1522
-
PyObject *ustr;
1514
+
Py_UNICODE *ustr;
1523
1515
Py_ssize_t n;
1516
+
char typecode;
1524
1517
1525
-
if (!PyArg_ParseTuple(args, "U:fromunicode", &ustr))
1518
+
if (!PyArg_ParseTuple(args, "u#:fromunicode", &ustr, &n))
1526
1519
return NULL;
1527
-
if (self->ob_descr->typecode != 'u') {
1520
+
typecode = self->ob_descr->typecode;
1521
+
if ((typecode != 'u')) {
1528
1522
PyErr_SetString(PyExc_ValueError,
1529
1523
"fromunicode() may only be called on "
1530
1524
"unicode type arrays");
1531
1525
return NULL;
1532
1526
}
1533
-
if (PyUnicode_READY(ustr))
1534
-
return NULL;
1535
-
n = PyUnicode_GET_LENGTH(ustr);
1536
1527
if (n > 0) {
1537
1528
Py_ssize_t old_size = Py_SIZE(self);
1538
1529
if (array_resize(self, old_size + n) == -1)
1539
1530
return NULL;
1540
-
if (!PyUnicode_AsUCS4(ustr, (Py_UCS4 *)self->ob_item + old_size, n, 0))
1541
-
return NULL;
1531
+
memcpy(self->ob_item + old_size * sizeof(Py_UNICODE),
1532
+
ustr, n * sizeof(Py_UNICODE));
1542
1533
}
1543
1534
1544
1535
Py_INCREF(Py_None);
@@ -1557,14 +1548,14 @@ append Unicode data to an array of some other type.");
1557
1548
static PyObject *
1558
1549
array_tounicode(arrayobject *self, PyObject *unused)
1559
1550
{
1560
-
if (self->ob_descr->typecode != 'u') {
1551
+
char typecode;
1552
+
typecode = self->ob_descr->typecode;
1553
+
if ((typecode != 'u')) {
1561
1554
PyErr_SetString(PyExc_ValueError,
1562
1555
"tounicode() may only be called on unicode type arrays");
1563
1556
return NULL;
1564
1557
}
1565
-
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1566
-
(Py_UCS4 *) self->ob_item,
1567
-
Py_SIZE(self));
1558
+
return PyUnicode_FromUnicode((Py_UNICODE *) self->ob_item, Py_SIZE(self));
1568
1559
}
1569
1560
1570
1561
PyDoc_STRVAR(tounicode_doc,
@@ -1671,7 +1662,13 @@ typecode_to_mformat_code(char typecode)
1671
1662
return UNSIGNED_INT8;
1672
1663
1673
1664
case 'u':
1674
-
return UTF32_LE + is_big_endian;
1665
+
if (sizeof(Py_UNICODE) == 2) {
1666
+
return UTF16_LE + is_big_endian;
1667
+
}
1668
+
if (sizeof(Py_UNICODE) == 4) {
1669
+
return UTF32_LE + is_big_endian;
1670
+
}
1671
+
return UNKNOWN_FORMAT;
1675
1672
1676
1673
case 'f':
1677
1674
if (sizeof(float) == 4) {
@@ -2419,8 +2416,14 @@ array_buffer_getbuf(arrayobject *self, Py_buffer *view, int flags)
2419
2416
view->strides = &(view->itemsize);
2420
2417
view->format = NULL;
2421
2418
view->internal = NULL;
2422
-
if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
2419
+
if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
2423
2420
view->format = self->ob_descr->formats;
2421
+
#ifdef Py_UNICODE_WIDE
2422
+
if (self->ob_descr->typecode == 'u') {
2423
+
view->format = "w";
2424
+
}
2425
+
#endif
2426
+
}
2424
2427
2425
2428
finish:
2426
2429
self->ob_exports++;
@@ -2534,25 +2537,29 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2534
2537
Py_DECREF(v);
2535
2538
}
2536
2539
else if (initial != NULL && PyUnicode_Check(initial)) {
2540
+
Py_UNICODE *ustr;
2537
2541
Py_ssize_t n;
2538
-
if (PyUnicode_READY(initial)) {
2542
+
2543
+
ustr = PyUnicode_AsUnicode(initial);
2544
+
if (ustr == NULL) {
2545
+
PyErr_NoMemory();
2539
2546
Py_DECREF(a);
2540
2547
return NULL;
2541
2548
}
2542
-
n = PyUnicode_GET_LENGTH(initial);
2549
+
2550
+
n = PyUnicode_GET_DATA_SIZE(initial);
2543
2551
if (n > 0) {
2544
2552
arrayobject *self = (arrayobject *)a;
2545
-
Py_UCS4 *item = (Py_UCS4 *)self->ob_item;
2546
-
item = (Py_UCS4 *)PyMem_Realloc(item, n * sizeof(Py_UCS4));
2553
+
char *item = self->ob_item;
2554
+
item = (char *)PyMem_Realloc(item, n);
2547
2555
if (item == NULL) {
2548
2556
PyErr_NoMemory();
2549
2557
Py_DECREF(a);
2550
2558
return NULL;
2551
2559
}
2552
-
self->ob_item = (char*)item;
2553
-
Py_SIZE(self) = n;
2554
-
if (!PyUnicode_AsUCS4(initial, item, n, 0))
2555
-
return NULL;
2560
+
self->ob_item = item;
2561
+
Py_SIZE(self) = n / sizeof(Py_UNICODE);
2562
+
memcpy(item, ustr, n);
2556
2563
self->allocated = Py_SIZE(self);
2557
2564
}
2558
2565
}
@@ -2593,7 +2600,7 @@ is a single character. The following type codes are defined:\n\
2593
2600
Type code C Type Minimum size in bytes \n\
2594
2601
'b' signed integer 1 \n\
2595
2602
'B' unsigned integer 1 \n\
2596
-
'u' Unicode character 4 \n\
2603
+
'u' Unicode character 2 (see note) \n\
2597
2604
'h' signed integer 2 \n\
2598
2605
'H' unsigned integer 2 \n\
2599
2606
'i' signed integer 2 \n\
@@ -2605,6 +2612,9 @@ is a single character. The following type codes are defined:\n\
2605
2612
'f' floating point 4 \n\
2606
2613
'd' floating point 8 \n\
2607
2614
\n\
2615
+
NOTE: The 'u' typecode corresponds to Python's unicode character. On \n\
2616
+
narrow builds this is 2-bytes on wide builds this is 4-bytes.\n\
2617
+
\n\
2608
2618
NOTE: The 'q' and 'Q' type codes are only available if the platform \n\
2609
2619
C compiler used to build Python supports 'long long', or, on Windows, \n\
2610
2620
'__int64'.\n\
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4