You have a long way to go...
This function is from the Qt sources:
{
if (!str)
if (size < 0)
size = qstrlen(str);
result.resize(size); // worst case
ushort *qch = result.d->data;
uint uc = 0;
uint min_uc = 0;
int need = 0;
int error = -1;
uchar ch;
int i = 0;
// skip utf8-encoded byte order mark
if (size >= 3
&& (uchar)str[0] == 0xef && (uchar)str[1] == 0xbb && (uchar)str[2] == 0xbf)
i += 3;
for (; i < size; ++i) {
ch = str[i];
if (need) {
if ((ch&0xc0) == 0x80) {
uc = (uc << 6) | (ch & 0x3f);
need--;
if (!need) {
if (uc > 0xffffU && uc < 0x110000U) {
// surrogate pair
*qch
++ = QChar::highSurrogate(uc
);
uc
= QChar::lowSurrogate(uc
);
} else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
// overlong seqence, UTF16 surrogate or BOM
uc
= QChar::ReplacementCharacter;
}
*qch++ = uc;
}
} else {
i = error;
need = 0;
*qch
++ = QChar::ReplacementCharacter;
}
} else {
if (ch < 128) {
*qch++ = ch;
} else if ((ch & 0xe0) == 0xc0) {
uc = ch & 0x1f;
need = 1;
error = i;
min_uc = 0x80;
} else if ((ch & 0xf0) == 0xe0) {
uc = ch & 0x0f;
need = 2;
error = i;
min_uc = 0x800;
} else if ((ch&0xf8) == 0xf0) {
uc = ch & 0x07;
need = 3;
error = i;
min_uc = 0x10000;
} else {
// Error
*qch
++ = QChar::ReplacementCharacter;
}
}
}
if (need) {
// we have some invalid characters remaining we need to add to the string
for (int i = error; i < size; ++i)
*qch
++ = QChar::ReplacementCharacter;
}
result.truncate(qch - result.d->data);
return result;
}
QString QString::fromUtf8(const char *str, int size)
{
if (!str)
return QString();
if (size < 0)
size = qstrlen(str);
QString result;
result.resize(size); // worst case
ushort *qch = result.d->data;
uint uc = 0;
uint min_uc = 0;
int need = 0;
int error = -1;
uchar ch;
int i = 0;
// skip utf8-encoded byte order mark
if (size >= 3
&& (uchar)str[0] == 0xef && (uchar)str[1] == 0xbb && (uchar)str[2] == 0xbf)
i += 3;
for (; i < size; ++i) {
ch = str[i];
if (need) {
if ((ch&0xc0) == 0x80) {
uc = (uc << 6) | (ch & 0x3f);
need--;
if (!need) {
if (uc > 0xffffU && uc < 0x110000U) {
// surrogate pair
*qch++ = QChar::highSurrogate(uc);
uc = QChar::lowSurrogate(uc);
} else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
// overlong seqence, UTF16 surrogate or BOM
uc = QChar::ReplacementCharacter;
}
*qch++ = uc;
}
} else {
i = error;
need = 0;
*qch++ = QChar::ReplacementCharacter;
}
} else {
if (ch < 128) {
*qch++ = ch;
} else if ((ch & 0xe0) == 0xc0) {
uc = ch & 0x1f;
need = 1;
error = i;
min_uc = 0x80;
} else if ((ch & 0xf0) == 0xe0) {
uc = ch & 0x0f;
need = 2;
error = i;
min_uc = 0x800;
} else if ((ch&0xf8) == 0xf0) {
uc = ch & 0x07;
need = 3;
error = i;
min_uc = 0x10000;
} else {
// Error
*qch++ = QChar::ReplacementCharacter;
}
}
}
if (need) {
// we have some invalid characters remaining we need to add to the string
for (int i = error; i < size; ++i)
*qch++ = QChar::ReplacementCharacter;
}
result.truncate(qch - result.d->data);
return result;
}
To copy to clipboard, switch view to plain text mode
As I told you, the data is not lost, everything is depending on how you read it. Other nice example is if you want to send int directly via network. Each function that sends that via the network takes char* pointer. Lets say we put:
int i = 100;
socket->write( (const char*)i, sizeof(int) );
int i = 100;
socket->write( (const char*)i, sizeof(int) );
To copy to clipboard, switch view to plain text mode
And now what - the integer is lost ? On the other side you will get 4 bytes(32bit OS), the you need to make just some bit operations in order to get back the integer number.
Bookmarks