PDA

View Full Version : Remove word microsoft only chars by unicode



patrik08
27th May 2008, 09:30
I have to manage tousend from webpage on 4 language..
and i find many word microsoft special Chars to remove or replace...
on first steep i clean it from Tidy lib

tidiconfigfile.append("word-2000: yes");

after i find only one or two char wo Tidy can not remove
• . first is a tag <li> similar dot (unicode 8226) and second a real dot
but this html not having list i try it to remove on this way..



static inline QString HTML2Filter( QString stream )
{
QMap<uint,QString> webchars;
webchars.insert(8226,QString(" ")); /* • */
webchars.insert(194,QString(" "));
webchars.insert(13,QString(""));
webchars.insert(10,QString(""));
QdocXhtml *convert = new QdocXhtml();
QString html = convert->Format(stream); /* tidy try to clean */
for (int i = 0; i < html.size(); ++i) {

QChar sign = html.at(i);
const uint asciiPos = (uint)sign.unicode();
bool replaceUnis = false;
if ((asciiPos >= 240) && (asciiPos <= 255)) {
replaceUnis = true;
} else if ((asciiPos >= 224) && (asciiPos <= 239)) {
replaceUnis = true;
} else if ((asciiPos >= 192) && (asciiPos <= 223)) {
replaceUnis = true;
} else if ( asciiPos >7999 ) {
replaceUnis = true;
}
if (replaceUnis) {
/////////QString charU = QString("%1").arg(asciiPos);
///////QByteArray preU = QByteArray();
//////const int prepender = qBound(1,5 - charU.size(),5);
///////preU.fill('0',prepender);
///////const QString UnicodeChar = QString("&#%2%1").arg(charU).arg(preU.data());
const QString UnicodeChar = QString("&#%1").arg(asciiPos);
webchars.insert(asciiPos,UnicodeChar);
}


}
QMapIterator<uint,QString> i(webchars);
while (i.hasNext()) {
i.next();
html = html.replace(QChar(i.key()),i.value());
}
return html;
}





and his is not disapper why? or font() not having sign to draw its ... how i can solve this?