Merge pull request #4253 from wwylele/string-util-cleanup

common/string_util cleanup
This commit is contained in:
bunnei 2018-10-01 17:52:58 -04:00 committed by GitHub
commit 4a5a97ab88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 7 additions and 158 deletions

View File

@ -289,14 +289,6 @@ ELSEIF (CMAKE_SYSTEM_NAME MATCHES "^(Linux|kFreeBSD|GNU|SunOS)$")
set(PLATFORM_LIBRARIES rt) set(PLATFORM_LIBRARIES rt)
ENDIF (APPLE) ENDIF (APPLE)
# MINGW: GCC does not support codecvt, so use iconv instead
if (UNIX OR MINGW)
find_library(ICONV_LIBRARY NAMES iconv)
if (ICONV_LIBRARY)
list(APPEND PLATFORM_LIBRARIES ${ICONV_LIBRARY})
endif()
endif()
# Setup a custom clang-format target (if clang-format can be found) that will run # Setup a custom clang-format target (if clang-format can be found) that will run
# against all the src files. This should be used before making a pull request. # against all the src files. This should be used before making a pull request.
# ======================================================================= # =======================================================================

View File

@ -263,7 +263,7 @@ private:
template <typename T> template <typename T>
void OpenFStream(T& fstream, const std::string& filename, std::ios_base::openmode openmode) { void OpenFStream(T& fstream, const std::string& filename, std::ios_base::openmode openmode) {
#ifdef _MSC_VER #ifdef _MSC_VER
fstream.open(Common::UTF8ToTStr(filename).c_str(), openmode); fstream.open(Common::UTF8ToUTF16W(filename).c_str(), openmode);
#else #else
fstream.open(filename.c_str(), openmode); fstream.open(filename.c_str(), openmode);
#endif #endif

View File

@ -5,6 +5,7 @@
#include <algorithm> #include <algorithm>
#include <cctype> #include <cctype>
#include <cerrno> #include <cerrno>
#include <codecvt>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
@ -13,11 +14,7 @@
#include "common/string_util.h" #include "common/string_util.h"
#ifdef _WIN32 #ifdef _WIN32
#include <codecvt>
#include <windows.h> #include <windows.h>
#include "common/common_funcs.h"
#else
#include <iconv.h>
#endif #endif
namespace Common { namespace Common {
@ -191,11 +188,9 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st
return result; return result;
} }
#ifdef _WIN32
std::string UTF16ToUTF8(const std::u16string& input) { std::string UTF16ToUTF8(const std::u16string& input) {
#if _MSC_VER >= 1900 #ifdef _MSC_VER
// Workaround for missing char16_t/char32_t instantiations in MSVC2015 // Workaround for missing char16_t/char32_t instantiations in MSVC2017
std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert; std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert;
std::basic_string<__int16> tmp_buffer(input.cbegin(), input.cend()); std::basic_string<__int16> tmp_buffer(input.cbegin(), input.cend());
return convert.to_bytes(tmp_buffer); return convert.to_bytes(tmp_buffer);
@ -206,8 +201,8 @@ std::string UTF16ToUTF8(const std::u16string& input) {
} }
std::u16string UTF8ToUTF16(const std::string& input) { std::u16string UTF8ToUTF16(const std::string& input) {
#if _MSC_VER >= 1900 #ifdef _MSC_VER
// Workaround for missing char16_t/char32_t instantiations in MSVC2015 // Workaround for missing char16_t/char32_t instantiations in MSVC2017
std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert; std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert;
auto tmp_buffer = convert.from_bytes(input); auto tmp_buffer = convert.from_bytes(input);
return std::u16string(tmp_buffer.cbegin(), tmp_buffer.cend()); return std::u16string(tmp_buffer.cbegin(), tmp_buffer.cend());
@ -217,6 +212,7 @@ std::u16string UTF8ToUTF16(const std::string& input) {
#endif #endif
} }
#ifdef _WIN32
static std::wstring CPToUTF16(u32 code_page, const std::string& input) { static std::wstring CPToUTF16(u32 code_page, const std::string& input) {
const auto size = const auto size =
MultiByteToWideChar(code_page, 0, input.data(), static_cast<int>(input.size()), nullptr, 0); MultiByteToWideChar(code_page, 0, input.data(), static_cast<int>(input.size()), nullptr, 0);
@ -257,124 +253,6 @@ std::wstring UTF8ToUTF16W(const std::string& input) {
return CPToUTF16(CP_UTF8, input); return CPToUTF16(CP_UTF8, input);
} }
std::string SHIFTJISToUTF8(const std::string& input) {
return UTF16ToUTF8(CPToUTF16(932, input));
}
std::string CP1252ToUTF8(const std::string& input) {
return UTF16ToUTF8(CPToUTF16(1252, input));
}
#else
template <typename T>
static std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input) {
iconv_t const conv_desc = iconv_open("UTF-8", fromcode);
if ((iconv_t)(-1) == conv_desc) {
LOG_ERROR(Common, "Iconv initialization failure [{}]: {}", fromcode, strerror(errno));
iconv_close(conv_desc);
return {};
}
const std::size_t in_bytes = sizeof(T) * input.size();
// Multiply by 4, which is the max number of bytes to encode a codepoint
const std::size_t out_buffer_size = 4 * in_bytes;
std::string out_buffer(out_buffer_size, '\0');
auto src_buffer = &input[0];
std::size_t src_bytes = in_bytes;
auto dst_buffer = &out_buffer[0];
std::size_t dst_bytes = out_buffer.size();
while (0 != src_bytes) {
std::size_t const iconv_result =
iconv(conv_desc, (char**)(&src_buffer), &src_bytes, &dst_buffer, &dst_bytes);
if (static_cast<std::size_t>(-1) == iconv_result) {
if (EILSEQ == errno || EINVAL == errno) {
// Try to skip the bad character
if (0 != src_bytes) {
--src_bytes;
++src_buffer;
}
} else {
LOG_ERROR(Common, "iconv failure [{}]: {}", fromcode, strerror(errno));
break;
}
}
}
std::string result;
out_buffer.resize(out_buffer_size - dst_bytes);
out_buffer.swap(result);
iconv_close(conv_desc);
return result;
}
std::u16string UTF8ToUTF16(const std::string& input) {
iconv_t const conv_desc = iconv_open("UTF-16LE", "UTF-8");
if ((iconv_t)(-1) == conv_desc) {
LOG_ERROR(Common, "Iconv initialization failure [UTF-8]: {}", strerror(errno));
iconv_close(conv_desc);
return {};
}
const std::size_t in_bytes = sizeof(char) * input.size();
// Multiply by 4, which is the max number of bytes to encode a codepoint
const std::size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes;
std::u16string out_buffer(out_buffer_size, char16_t{});
char* src_buffer = const_cast<char*>(&input[0]);
std::size_t src_bytes = in_bytes;
char* dst_buffer = (char*)(&out_buffer[0]);
std::size_t dst_bytes = out_buffer.size();
while (0 != src_bytes) {
std::size_t const iconv_result =
iconv(conv_desc, &src_buffer, &src_bytes, &dst_buffer, &dst_bytes);
if (static_cast<std::size_t>(-1) == iconv_result) {
if (EILSEQ == errno || EINVAL == errno) {
// Try to skip the bad character
if (0 != src_bytes) {
--src_bytes;
++src_buffer;
}
} else {
LOG_ERROR(Common, "iconv failure [UTF-8]: {}", strerror(errno));
break;
}
}
}
std::u16string result;
out_buffer.resize(out_buffer_size - dst_bytes);
out_buffer.swap(result);
iconv_close(conv_desc);
return result;
}
std::string UTF16ToUTF8(const std::u16string& input) {
return CodeToUTF8("UTF-16LE", input);
}
std::string CP1252ToUTF8(const std::string& input) {
// return CodeToUTF8("CP1252//TRANSLIT", input);
// return CodeToUTF8("CP1252//IGNORE", input);
return CodeToUTF8("CP1252", input);
}
std::string SHIFTJISToUTF8(const std::string& input) {
// return CodeToUTF8("CP932", input);
return CodeToUTF8("SJIS", input);
}
#endif #endif
std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, std::size_t max_len) { std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, std::size_t max_len) {

View File

@ -70,31 +70,10 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st
std::string UTF16ToUTF8(const std::u16string& input); std::string UTF16ToUTF8(const std::u16string& input);
std::u16string UTF8ToUTF16(const std::string& input); std::u16string UTF8ToUTF16(const std::string& input);
std::string CP1252ToUTF8(const std::string& str);
std::string SHIFTJISToUTF8(const std::string& str);
#ifdef _WIN32 #ifdef _WIN32
std::string UTF16ToUTF8(const std::wstring& input); std::string UTF16ToUTF8(const std::wstring& input);
std::wstring UTF8ToUTF16W(const std::string& str); std::wstring UTF8ToUTF16W(const std::string& str);
#ifdef _UNICODE
inline std::string TStrToUTF8(const std::wstring& str) {
return UTF16ToUTF8(str);
}
inline std::wstring UTF8ToTStr(const std::string& str) {
return UTF8ToUTF16W(str);
}
#else
inline std::string TStrToUTF8(const std::string& str) {
return str;
}
inline std::string UTF8ToTStr(const std::string& str) {
return str;
}
#endif
#endif #endif
/** /**