Remove word microsoft only chars by unicode
I have to manage tousend from webpage on 4 language..
and i find many word microsoft special Chars to remove or replace...
on first steep i clean it from Tidy lib
tidiconfigfile.append("word-2000: yes");
after i find only one or two char wo Tidy can not remove
• . first is a tag <li> similar dot (unicode 8226) and second a real dot
but this html not having list i try it to remove on this way..
Code:
{
QMap<uint,QString> webchars;
webchars.
insert(8226,
QString(" "));
/* • */ QdocXhtml *convert = new QdocXhtml();
QString html
= convert
->Format
(stream
);
/* tidy try to clean */ for (int i = 0; i < html.size(); ++i) {
const uint asciiPos = (uint)sign.unicode();
bool replaceUnis = false;
if ((asciiPos >= 240) && (asciiPos <= 255)) {
replaceUnis = true;
} else if ((asciiPos >= 224) && (asciiPos <= 239)) {
replaceUnis = true;
} else if ((asciiPos >= 192) && (asciiPos <= 223)) {
replaceUnis = true;
} else if ( asciiPos >7999 ) {
replaceUnis = true;
}
if (replaceUnis) {
/////////QString charU = QString("%1").arg(asciiPos);
///////QByteArray preU = QByteArray();
//////const int prepender = qBound(1,5 - charU.size(),5);
///////preU.fill('0',prepender);
///////const QString UnicodeChar = QString("&#%2%1").arg(charU).arg(preU.data());
webchars.insert(asciiPos,UnicodeChar);
}
}
QMapIterator<uint,QString> i(webchars);
while (i.hasNext()) {
i.next();
html
= html.
replace(QChar(i.
key()),i.
value());
}
return html;
}
and his is not disapper why? or font() not having sign to draw its ... how i can solve this?