diff --git a/services/backup/src/Tools.cpp b/services/backup/src/Tools.cpp --- a/services/backup/src/Tools.cpp +++ b/services/backup/src/Tools.cpp @@ -3,6 +3,7 @@ #include "GlobalConstants.h" #include "GlobalTools.h" +#include #include #include #include @@ -53,19 +54,19 @@ } std::size_t getUtf8Length(std::string &str) { - std::size_t result = 0; - const char *ptr = str.data(); - const char *end = ptr + str.size(); - while (ptr < end) { - int next = std::mblen(ptr, end - ptr); - if (next == -1) { - throw std::runtime_error("strlen_mb(): conversionn error"); - } - ptr += next; - ++result; - } - return result; + // How it works? + // Every byte of multibyte utf8 character starts with information about its + // own type. Additional bytes always starts with a 2bit sequence "10xx xxxx" + // (which is equal to hex 0x80). To count the length of utf8 string we need + // to skip these bytes, so we use a bitmask. + // + // We need to compare first 2 bits of a byte, so we use bitwise operator + // & with mask 0xC0. If returned value is equal to 0x80, we skip it. + + return std::count_if(str.begin(), str.end(), [](char c) { + return (static_cast(c) & 0xC0) != 0x80; + }); } } // namespace tools