convert_UTF: rewrite in C++

This allows us to namespace the symbols properly.

Bug: google-breakpad:725
Change-Id: Iea8052547eef6c0acb299c1995781735c6d8994f
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/1769236
Reviewed-by: Mark Mentovai <mark@chromium.org>
This commit is contained in:
Mike Frysinger
2019-08-03 12:12:40 -04:00
parent abfe08e789
commit db1cda2653
8 changed files with 53 additions and 44 deletions

View File

@@ -30,7 +30,7 @@
// Author: waylonis@google.com (Dan Waylonis)
/*
g++ -I../ ../common/convert_UTF.c \
g++ -I../ ../common/convert_UTF.cc \
../common/string_conversion.cc \
minidump_file_writer.cc \
minidump_file_writer_unittest.cc \

View File

@@ -40,13 +40,13 @@ BIN_DIR=.
THREAD_SRC=solaris_lwp.cc
SHARE_SRC=../../minidump_file_writer.cc\
../../../common/convert_UTF.cc\
../../../common/md5.cc\
../../../common/string_conversion.cc\
../../../common/solaris/file_id.cc\
minidump_generator.cc
HANDLER_SRC=exception_handler.cc\
../../../common/solaris/guid_creator.cc
SHARE_C_SRC=../../../common/convert_UTF.c
MINIDUMP_TEST_SRC=minidump_test.cc
EXCEPTION_TEST_SRC=exception_handler_test.cc
@@ -54,11 +54,10 @@ EXCEPTION_TEST_SRC=exception_handler_test.cc
THREAD_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(THREAD_SRC))
SHARE_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(SHARE_SRC))
HANDLER_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(HANDLER_SRC))
SHARE_C_OBJ=$(patsubst %.c,$(OBJ_DIR)/%.o,$(SHARE_C_SRC))
MINIDUMP_TEST_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o, $(MINIDUMP_TEST_SRC))\
$(THREAD_OBJ) $(SHARE_OBJ) $(SHARE_C_OBJ) $(HANDLER_OBJ)
$(THREAD_OBJ) $(SHARE_OBJ) $(HANDLER_OBJ)
EXCEPTION_TEST_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o, $(EXCEPTION_TEST_SRC))\
$(THREAD_OBJ) $(SHARE_OBJ) $(SHARE_C_OBJ) $(HANDLER_OBJ)
$(THREAD_OBJ) $(SHARE_OBJ) $(HANDLER_OBJ)
BIN=$(BIN_DIR)/minidump_test\
$(BIN_DIR)/exception_handler_test

View File

@@ -61,7 +61,7 @@
'android/ucontext_constants.h',
'basictypes.h',
'byte_cursor.h',
'convert_UTF.c',
'convert_UTF.cc',
'convert_UTF.h',
'dwarf/bytereader-inl.h',
'dwarf/bytereader.cc',

View File

@@ -60,10 +60,16 @@ See the header file "ConvertUTF.h" for complete documentation.
#include <stdio.h>
#endif
static const int halfShift = 10; /* used for shifting by 10 bits */
namespace google_breakpad {
static const UTF32 halfBase = 0x0010000UL;
static const UTF32 halfMask = 0x3FFUL;
namespace {
const int halfShift = 10; /* used for shifting by 10 bits */
const UTF32 halfBase = 0x0010000UL;
const UTF32 halfMask = 0x3FFUL;
} // namespace
#define UNI_SUR_HIGH_START (UTF32)0xD800
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
@@ -183,6 +189,8 @@ ConversionResult ConvertUTF16toUTF32 (const UTF16** sourceStart, const UTF16* so
/* --------------------------------------------------------------------- */
namespace {
/*
* Index into the table below with the first byte of a UTF-8 sequence to
* get the number of trailing bytes that are supposed to follow it.
@@ -190,7 +198,7 @@ ConversionResult ConvertUTF16toUTF32 (const UTF16** sourceStart, const UTF16* so
* left as-is for anyone who may want to do such conversion, which was
* allowed in earlier algorithms.
*/
static const char trailingBytesForUTF8[256] = {
const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -206,7 +214,7 @@ static const char trailingBytesForUTF8[256] = {
* This table contains as many values as there might be trailing bytes
* in a UTF-8 sequence.
*/
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
/*
@@ -216,7 +224,7 @@ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
* for *legal* UTF-8 will be 4 or fewer bytes total.
*/
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
/* --------------------------------------------------------------------- */
@@ -228,6 +236,8 @@ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
* into an inline function.
*/
} // namespace
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF16toUTF8 (const UTF16** sourceStart, const UTF16* sourceEnd,
@@ -299,6 +309,8 @@ return result;
/* --------------------------------------------------------------------- */
namespace {
/*
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
* This must be called with the length pre-determined by the first byte.
@@ -309,8 +321,7 @@ return result;
* If presented with a length > 4, this returns false. The Unicode
* definition of UTF-8 goes up to 4-byte sequences.
*/
static Boolean isLegalUTF8(const UTF8 *source, int length) {
Boolean isLegalUTF8(const UTF8 *source, int length) {
UTF8 a;
const UTF8 *srcptr = source+length;
switch (length) {
@@ -335,6 +346,8 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) {
return true;
}
} // namespace
/* --------------------------------------------------------------------- */
/*
@@ -552,3 +565,5 @@ In UTF-8 writing code, the switches on "bytesToWrite" are
similarly unrolled loops.
--------------------------------------------------------------------- */
} // namespace google_breakpad

View File

@@ -106,6 +106,8 @@ All should be unsigned values to avoid sign extension during
bit mask & shift operations.
------------------------------------------------------------------------ */
namespace google_breakpad {
typedef unsigned long UTF32; /* at least 32 bits */
typedef unsigned short UTF16; /* at least 16 bits */
typedef unsigned char UTF8; /* typically 8 bits */
@@ -130,11 +132,6 @@ typedef enum {
lenientConversion
} ConversionFlags;
/* This is for C++ and does no harm in C */
#ifdef __cplusplus
extern "C" {
#endif
ConversionResult ConvertUTF8toUTF16 (const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
@@ -155,9 +152,7 @@ ConversionResult ConvertUTF32toUTF16 (const UTF32** sourceStart, const UTF32* so
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
#ifdef __cplusplus
}
#endif
} // namespace google_breakpad
/* --------------------------------------------------------------------- */