|
|
|
@ -75,6 +75,11 @@ int pf_bsearch_r(void *userdata, size_t num_data, pf_bsearch_cb_comp_t cb_comp,
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Returns true if passed byte is first byte of UTF-8 char sequence */
|
|
|
|
|
static inline bool utf8_is_start_byte_of_char(const uint8_t b) {
|
|
|
|
|
return 0x80 != (b & 0xC0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* This function gets the character at the pstart position, interpreting UTF8 multibyte sequences
|
|
|
|
|
and returns the pointer to the next character */
|
|
|
|
|
uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t *pval) {
|
|
|
|
@ -131,8 +136,8 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t
|
|
|
|
|
p++;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
else if (0x80 == (0xC0 & valcur))
|
|
|
|
|
for (; 0x80 == (0xC0 & valcur); ) { p++; valcur = cb_read_byte(p); }
|
|
|
|
|
else if (!utf8_is_start_byte_of_char(valcur))
|
|
|
|
|
for (; !utf8_is_start_byte_of_char(valcur); ) { p++; valcur = cb_read_byte(p); }
|
|
|
|
|
else
|
|
|
|
|
for (; 0xFC < (0xFE & valcur); ) { p++; valcur = cb_read_byte(p); }
|
|
|
|
|
|
|
|
|
@ -143,12 +148,12 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t
|
|
|
|
|
|
|
|
|
|
static inline uint8_t utf8_strlen_cb(const char *pstart, read_byte_cb_t cb_read_byte) {
|
|
|
|
|
uint8_t cnt = 0;
|
|
|
|
|
uint8_t *pnext = (uint8_t *)pstart;
|
|
|
|
|
uint8_t *p = (uint8_t *)pstart;
|
|
|
|
|
for (;;) {
|
|
|
|
|
wchar_t ch;
|
|
|
|
|
pnext = get_utf8_value_cb(pnext, cb_read_byte, &ch);
|
|
|
|
|
if (!ch) break;
|
|
|
|
|
cnt++;
|
|
|
|
|
const uint8_t b = cb_read_byte(p);
|
|
|
|
|
if (!b) break;
|
|
|
|
|
if (utf8_is_start_byte_of_char(b)) cnt++;
|
|
|
|
|
p++;
|
|
|
|
|
}
|
|
|
|
|
return cnt;
|
|
|
|
|
}
|
|
|
|
@ -160,3 +165,26 @@ uint8_t utf8_strlen(const char *pstart) {
|
|
|
|
|
uint8_t utf8_strlen_P(PGM_P pstart) {
|
|
|
|
|
return utf8_strlen_cb(pstart, read_byte_rom);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline uint8_t utf8_byte_pos_by_char_num_cb(const char *pstart, read_byte_cb_t cb_read_byte, const uint8_t charnum) {
|
|
|
|
|
uint8_t *p = (uint8_t *)pstart;
|
|
|
|
|
uint8_t char_idx = 0;
|
|
|
|
|
uint8_t byte_idx = 0;
|
|
|
|
|
for (;;) {
|
|
|
|
|
const uint8_t b = cb_read_byte(p + byte_idx);
|
|
|
|
|
if (!b) return byte_idx; // Termination byte of string
|
|
|
|
|
if (utf8_is_start_byte_of_char(b)) {
|
|
|
|
|
char_idx++;
|
|
|
|
|
if (char_idx == charnum + 1) return byte_idx;
|
|
|
|
|
}
|
|
|
|
|
byte_idx++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t utf8_byte_pos_by_char_num(const char *pstart, const uint8_t charnum) {
|
|
|
|
|
return utf8_byte_pos_by_char_num_cb(pstart, read_byte_ram, charnum);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t utf8_byte_pos_by_char_num_P(PGM_P pstart, const uint8_t charnum) {
|
|
|
|
|
return utf8_byte_pos_by_char_num_cb(pstart, read_byte_rom, charnum);
|
|
|
|
|
}
|
|
|
|
|