00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef CHARACTER
00018 #define CHARACTER
00019
00020 #include <string>
00021 #include <list>
00022 #include "phonetic_symbol.h"
00023 #include "utf8.h"
00024 using namespace std;
00025
00026 namespace ekho {
00027 class Character {
00028 public:
00029 Character(void): code(0), phonSymbol(0) { };
00030 Character(unsigned int code): code(code), phonSymbol(0) {
00031 };
00032 Character(const Character &c): code(c.code), phonSymbol(c.phonSymbol) {
00033 };
00034 Character(unsigned int code, PhoneticSymbol *symbol):
00035 code(code),
00036 phonSymbol(symbol) {};
00037 Character(string &utf8, PhoneticSymbol *symbol):
00038 phonSymbol(symbol) {
00039 setUtf8(utf8);
00040 };
00041
00042 unsigned int code;
00043 PhoneticSymbol *phonSymbol;
00044
00045 string getUtf8(void) {
00046 char buf[5] = {0};
00047 try {
00048 utf8::append(code, buf);
00049 } catch (...) {
00050 cerr << "code point:" << code << endl;
00051 }
00052 return string(buf);
00053 };
00054
00055 void setUtf8(string &utf8) {
00056 code = utf8::peek_next(utf8.begin(), utf8.end());
00057 };
00058
00059 static list<Character> split(string &text) {
00060 list<Character> char_list;
00061 int c;
00062 bool is_finish = false;
00063 string::iterator it = text.begin();
00064 string::iterator end = text.end();
00065
00066 while (!is_finish && it != end) {
00067 try {
00068 c = utf8::next(it, end);
00069 char_list.push_back(c);
00070 } catch (utf8::not_enough_room &) {
00071 is_finish = true;
00072 } catch (utf8::invalid_utf8 &) {
00073 cerr << "Invalid UTF8 encoding" << endl;
00074 is_finish = true;
00075 }
00076 }
00077
00078 return char_list;
00079 };
00080 };
00081 }
00082
00083 #endif