1
-
/* auto-generated on 2024-12-17 14:54:59 -0500. Do not edit! */
1
+
/* auto-generated on 2024-12-26 12:42:33 -0500. Do not edit! */
2
2
/* begin file src/simdutf.cpp */
3
3
#include "simdutf.h"
4
4
// We include base64_tables once.
@@ -697,6 +697,15 @@ static_assert(to_base64_url_value[uint8_t('_')] == 63,
697
697
#include <climits>
698
698
#include <type_traits>
699
699
700
+
static_assert(sizeof(uint8_t) == sizeof(char),
701
+
"simdutf requires that uint8_t be a char");
702
+
static_assert(sizeof(uint16_t) == sizeof(char16_t),
703
+
"simdutf requires that char16_t be 16 bits");
704
+
static_assert(sizeof(uint32_t) == sizeof(char32_t),
705
+
"simdutf requires that char32_t be 32 bits");
706
+
// next line is redundant, but it is kept to catch defective systems.
707
+
static_assert(CHAR_BIT == 8, "simdutf requires 8-bit bytes");
708
+
700
709
// Useful for debugging purposes
701
710
namespace simdutf {
702
711
namespace {
@@ -9746,24 +9755,23 @@ inline simdutf_warn_unused uint16_t swap_bytes(const uint16_t word) {
9746
9755
}
9747
9756
9748
9757
template <endianness big_endian>
9749
-
inline simdutf_warn_unused bool validate(const char16_t *buf,
9758
+
inline simdutf_warn_unused bool validate(const char16_t *data,
9750
9759
size_t len) noexcept {
9751
-
const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
9752
9760
uint64_t pos = 0;
9753
9761
while (pos < len) {
9754
-
uint16_t word =
9762
+
char16_t word =
9755
9763
!match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
9756
9764
if ((word & 0xF800) == 0xD800) {
9757
9765
if (pos + 1 >= len) {
9758
9766
return false;
9759
9767
}
9760
-
uint16_t diff = uint16_t(word - 0xD800);
9768
+
char16_t diff = char16_t(word - 0xD800);
9761
9769
if (diff > 0x3FF) {
9762
9770
return false;
9763
9771
}
9764
-
uint16_t next_word =
9772
+
char16_t next_word =
9765
9773
!match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
9766
-
uint16_t diff2 = uint16_t(next_word - 0xDC00);
9774
+
char16_t diff2 = char16_t(next_word - 0xDC00);
9767
9775
if (diff2 > 0x3FF) {
9768
9776
return false;
9769
9777
}
@@ -9776,24 +9784,23 @@ inline simdutf_warn_unused bool validate(const char16_t *buf,
9776
9784
}
9777
9785
9778
9786
template <endianness big_endian>
9779
-
inline simdutf_warn_unused result validate_with_errors(const char16_t *buf,
9787
+
inline simdutf_warn_unused result validate_with_errors(const char16_t *data,
9780
9788
size_t len) noexcept {
9781
-
const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
9782
9789
size_t pos = 0;
9783
9790
while (pos < len) {
9784
-
uint16_t word =
9791
+
char16_t word =
9785
9792
!match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
9786
9793
if ((word & 0xF800) == 0xD800) {
9787
9794
if (pos + 1 >= len) {
9788
9795
return result(error_code::SURROGATE, pos);
9789
9796
}
9790
-
uint16_t diff = uint16_t(word - 0xD800);
9797
+
char16_t diff = char16_t(word - 0xD800);
9791
9798
if (diff > 0x3FF) {
9792
9799
return result(error_code::SURROGATE, pos);
9793
9800
}
9794
-
uint16_t next_word =
9801
+
char16_t next_word =
9795
9802
!match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
9796
-
uint16_t diff2 = uint16_t(next_word - 0xDC00);
9803
+
char16_t diff2 = uint16_t(next_word - 0xDC00);
9797
9804
if (diff2 > 0x3FF) {
9798
9805
return result(error_code::SURROGATE, pos);
9799
9806
}
@@ -9806,24 +9813,22 @@ inline simdutf_warn_unused result validate_with_errors(const char16_t *buf,
9806
9813
}
9807
9814
9808
9815
template <endianness big_endian>
9809
-
inline size_t count_code_points(const char16_t *buf, size_t len) {
9816
+
inline size_t count_code_points(const char16_t *p, size_t len) {
9810
9817
// We are not BOM aware.
9811
-
const uint16_t *p = reinterpret_cast<const uint16_t *>(buf);
9812
9818
size_t counter{0};
9813
9819
for (size_t i = 0; i < len; i++) {
9814
-
uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
9820
+
char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
9815
9821
counter += ((word & 0xFC00) != 0xDC00);
9816
9822
}
9817
9823
return counter;
9818
9824
}
9819
9825
9820
9826
template <endianness big_endian>
9821
-
inline size_t utf8_length_from_utf16(const char16_t *buf, size_t len) {
9827
+
inline size_t utf8_length_from_utf16(const char16_t *p, size_t len) {
9822
9828
// We are not BOM aware.
9823
-
const uint16_t *p = reinterpret_cast<const uint16_t *>(buf);
9824
9829
size_t counter{0};
9825
9830
for (size_t i = 0; i < len; i++) {
9826
-
uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
9831
+
char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
9827
9832
counter++; // ASCII
9828
9833
counter += static_cast<size_t>(
9829
9834
word >
@@ -9835,25 +9840,22 @@ inline size_t utf8_length_from_utf16(const char16_t *buf, size_t len) {
9835
9840
}
9836
9841
9837
9842
template <endianness big_endian>
9838
-
inline size_t utf32_length_from_utf16(const char16_t *buf, size_t len) {
9843
+
inline size_t utf32_length_from_utf16(const char16_t *p, size_t len) {
9839
9844
// We are not BOM aware.
9840
-
const uint16_t *p = reinterpret_cast<const uint16_t *>(buf);
9841
9845
size_t counter{0};
9842
9846
for (size_t i = 0; i < len; i++) {
9843
-
uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
9847
+
char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
9844
9848
counter += ((word & 0xFC00) != 0xDC00);
9845
9849
}
9846
9850
return counter;
9847
9851
}
9848
9852
9849
9853
inline size_t latin1_length_from_utf16(size_t len) { return len; }
9850
9854
9851
-
simdutf_really_inline void change_endianness_utf16(const char16_t *in,
9852
-
size_t size, char16_t *out) {
9853
-
const uint16_t *input = reinterpret_cast<const uint16_t *>(in);
9854
-
uint16_t *output = reinterpret_cast<uint16_t *>(out);
9855
+
simdutf_really_inline void
9856
+
change_endianness_utf16(const char16_t *input, size_t size, char16_t *output) {
9855
9857
for (size_t i = 0; i < size; i++) {
9856
-
*output++ = uint16_t(input[i] >> 8 | input[i] << 8);
9858
+
*output++ = char16_t(input[i] >> 8 | input[i] << 8);
9857
9859
}
9858
9860
}
9859
9861
@@ -21042,6 +21044,9 @@ struct validating_transcoder {
21042
21044
uint64_t utf8_continuation_mask =
21043
21045
input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in
21044
21046
// this case, we also have ASCII to account for.
21047
+
if (utf8_continuation_mask & 1) {
21048
+
return 0; // error
21049
+
}
21045
21050
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
21046
21051
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
21047
21052
// We process in blocks of up to 12 bytes except possibly
@@ -26717,6 +26722,14 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen,
26717
26722
}
26718
26723
26719
26724
if (!ignore_garbage && equalsigns > 0) {
26725
+
if (last_chunk_options == last_chunk_handling_options::strict) {
26726
+
return {BASE64_INPUT_REMAINDER, size_t(src - srcinit),
26727
+
size_t(dst - dstinit)};
26728
+
}
26729
+
if (last_chunk_options ==
26730
+
last_chunk_handling_options::stop_before_partial) {
26731
+
return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)};
26732
+
}
26720
26733
if ((size_t(dst - dstinit) % 3 == 0) ||
26721
26734
((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) {
26722
26735
return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)};
@@ -33161,6 +33174,9 @@ struct validating_transcoder {
33161
33174
uint64_t utf8_continuation_mask =
33162
33175
input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in
33163
33176
// this case, we also have ASCII to account for.
33177
+
if (utf8_continuation_mask & 1) {
33178
+
return 0; // error
33179
+
}
33164
33180
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
33165
33181
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
33166
33182
// We process in blocks of up to 12 bytes except possibly
@@ -43013,6 +43029,9 @@ struct validating_transcoder {
43013
43029
uint64_t utf8_continuation_mask =
43014
43030
input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in
43015
43031
// this case, we also have ASCII to account for.
43032
+
if (utf8_continuation_mask & 1) {
43033
+
return 0; // error
43034
+
}
43016
43035
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
43017
43036
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
43018
43037
// We process in blocks of up to 12 bytes except possibly
@@ -48110,6 +48129,9 @@ struct validating_transcoder {
48110
48129
uint64_t utf8_continuation_mask =
48111
48130
input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in
48112
48131
// this case, we also have ASCII to account for.
48132
+
if (utf8_continuation_mask & 1) {
48133
+
return 0; // error
48134
+
}
48113
48135
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
48114
48136
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
48115
48137
// We process in blocks of up to 12 bytes except possibly
@@ -54454,6 +54476,9 @@ struct validating_transcoder {
54454
54476
uint64_t utf8_continuation_mask =
54455
54477
input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in
54456
54478
// this case, we also have ASCII to account for.
54479
+
if (utf8_continuation_mask & 1) {
54480
+
return 0; // error
54481
+
}
54457
54482
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
54458
54483
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
54459
54484
// We process in blocks of up to 12 bytes except possibly
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4