mirror of
https://git.suyu.dev/suyu/breakpad.git
synced 2026-02-19 00:39:38 +00:00
convert_UTF: rewrite in C++
This allows us to namespace the symbols properly. Bug: google-breakpad:725 Change-Id: Iea8052547eef6c0acb299c1995781735c6d8994f Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/1769236 Reviewed-by: Mark Mentovai <mark@chromium.org>
This commit is contained in:
@@ -30,7 +30,7 @@
|
||||
// Author: waylonis@google.com (Dan Waylonis)
|
||||
|
||||
/*
|
||||
g++ -I../ ../common/convert_UTF.c \
|
||||
g++ -I../ ../common/convert_UTF.cc \
|
||||
../common/string_conversion.cc \
|
||||
minidump_file_writer.cc \
|
||||
minidump_file_writer_unittest.cc \
|
||||
|
||||
@@ -40,13 +40,13 @@ BIN_DIR=.
|
||||
|
||||
THREAD_SRC=solaris_lwp.cc
|
||||
SHARE_SRC=../../minidump_file_writer.cc\
|
||||
../../../common/convert_UTF.cc\
|
||||
../../../common/md5.cc\
|
||||
../../../common/string_conversion.cc\
|
||||
../../../common/solaris/file_id.cc\
|
||||
minidump_generator.cc
|
||||
HANDLER_SRC=exception_handler.cc\
|
||||
../../../common/solaris/guid_creator.cc
|
||||
SHARE_C_SRC=../../../common/convert_UTF.c
|
||||
|
||||
MINIDUMP_TEST_SRC=minidump_test.cc
|
||||
EXCEPTION_TEST_SRC=exception_handler_test.cc
|
||||
@@ -54,11 +54,10 @@ EXCEPTION_TEST_SRC=exception_handler_test.cc
|
||||
THREAD_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(THREAD_SRC))
|
||||
SHARE_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(SHARE_SRC))
|
||||
HANDLER_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(HANDLER_SRC))
|
||||
SHARE_C_OBJ=$(patsubst %.c,$(OBJ_DIR)/%.o,$(SHARE_C_SRC))
|
||||
MINIDUMP_TEST_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o, $(MINIDUMP_TEST_SRC))\
|
||||
$(THREAD_OBJ) $(SHARE_OBJ) $(SHARE_C_OBJ) $(HANDLER_OBJ)
|
||||
$(THREAD_OBJ) $(SHARE_OBJ) $(HANDLER_OBJ)
|
||||
EXCEPTION_TEST_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o, $(EXCEPTION_TEST_SRC))\
|
||||
$(THREAD_OBJ) $(SHARE_OBJ) $(SHARE_C_OBJ) $(HANDLER_OBJ)
|
||||
$(THREAD_OBJ) $(SHARE_OBJ) $(HANDLER_OBJ)
|
||||
|
||||
BIN=$(BIN_DIR)/minidump_test\
|
||||
$(BIN_DIR)/exception_handler_test
|
||||
|
||||
@@ -61,7 +61,7 @@
|
||||
'android/ucontext_constants.h',
|
||||
'basictypes.h',
|
||||
'byte_cursor.h',
|
||||
'convert_UTF.c',
|
||||
'convert_UTF.cc',
|
||||
'convert_UTF.h',
|
||||
'dwarf/bytereader-inl.h',
|
||||
'dwarf/bytereader.cc',
|
||||
|
||||
@@ -60,10 +60,16 @@ See the header file "ConvertUTF.h" for complete documentation.
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
static const int halfShift = 10; /* used for shifting by 10 bits */
|
||||
namespace google_breakpad {
|
||||
|
||||
static const UTF32 halfBase = 0x0010000UL;
|
||||
static const UTF32 halfMask = 0x3FFUL;
|
||||
namespace {
|
||||
|
||||
const int halfShift = 10; /* used for shifting by 10 bits */
|
||||
|
||||
const UTF32 halfBase = 0x0010000UL;
|
||||
const UTF32 halfMask = 0x3FFUL;
|
||||
|
||||
} // namespace
|
||||
|
||||
#define UNI_SUR_HIGH_START (UTF32)0xD800
|
||||
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
|
||||
@@ -183,6 +189,8 @@ ConversionResult ConvertUTF16toUTF32 (const UTF16** sourceStart, const UTF16* so
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
namespace {
|
||||
|
||||
/*
|
||||
* Index into the table below with the first byte of a UTF-8 sequence to
|
||||
* get the number of trailing bytes that are supposed to follow it.
|
||||
@@ -190,7 +198,7 @@ ConversionResult ConvertUTF16toUTF32 (const UTF16** sourceStart, const UTF16* so
|
||||
* left as-is for anyone who may want to do such conversion, which was
|
||||
* allowed in earlier algorithms.
|
||||
*/
|
||||
static const char trailingBytesForUTF8[256] = {
|
||||
const char trailingBytesForUTF8[256] = {
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
@@ -206,7 +214,7 @@ static const char trailingBytesForUTF8[256] = {
|
||||
* This table contains as many values as there might be trailing bytes
|
||||
* in a UTF-8 sequence.
|
||||
*/
|
||||
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
||||
const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
||||
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
|
||||
|
||||
/*
|
||||
@@ -216,7 +224,7 @@ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080
|
||||
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
|
||||
* for *legal* UTF-8 will be 4 or fewer bytes total.
|
||||
*/
|
||||
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
||||
const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
@@ -228,6 +236,8 @@ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
|
||||
* into an inline function.
|
||||
*/
|
||||
|
||||
} // namespace
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF16toUTF8 (const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
@@ -299,6 +309,8 @@ return result;
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
namespace {
|
||||
|
||||
/*
|
||||
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
|
||||
* This must be called with the length pre-determined by the first byte.
|
||||
@@ -309,8 +321,7 @@ return result;
|
||||
* If presented with a length > 4, this returns false. The Unicode
|
||||
* definition of UTF-8 goes up to 4-byte sequences.
|
||||
*/
|
||||
|
||||
static Boolean isLegalUTF8(const UTF8 *source, int length) {
|
||||
Boolean isLegalUTF8(const UTF8 *source, int length) {
|
||||
UTF8 a;
|
||||
const UTF8 *srcptr = source+length;
|
||||
switch (length) {
|
||||
@@ -335,6 +346,8 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) {
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
@@ -552,3 +565,5 @@ In UTF-8 writing code, the switches on "bytesToWrite" are
|
||||
similarly unrolled loops.
|
||||
|
||||
--------------------------------------------------------------------- */
|
||||
|
||||
} // namespace google_breakpad
|
||||
@@ -106,6 +106,8 @@ All should be unsigned values to avoid sign extension during
|
||||
bit mask & shift operations.
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
namespace google_breakpad {
|
||||
|
||||
typedef unsigned long UTF32; /* at least 32 bits */
|
||||
typedef unsigned short UTF16; /* at least 16 bits */
|
||||
typedef unsigned char UTF8; /* typically 8 bits */
|
||||
@@ -130,11 +132,6 @@ typedef enum {
|
||||
lenientConversion
|
||||
} ConversionFlags;
|
||||
|
||||
/* This is for C++ and does no harm in C */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
ConversionResult ConvertUTF8toUTF16 (const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
|
||||
|
||||
@@ -155,9 +152,7 @@ ConversionResult ConvertUTF32toUTF16 (const UTF32** sourceStart, const UTF32* so
|
||||
|
||||
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
} // namespace google_breakpad
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user