I have to manage tousend from webpage on 4 language..
and i find many word microsoft special Chars to remove or replace...
on first steep i clean it from Tidy lib

tidiconfigfile.append("word-2000: yes");

after i find only one or two char wo Tidy can not remove
• . first is a tag <li> similar dot (unicode 8226) and second a real dot
but this html not having list i try it to remove on this way..

Qt Code:
  1. static inline QString HTML2Filter( QString stream )
  2. {
  3. QMap<uint,QString> webchars;
  4. webchars.insert(8226,QString(" ")); /* • */
  5. webchars.insert(194,QString(" "));
  6. webchars.insert(13,QString(""));
  7. webchars.insert(10,QString(""));
  8. QdocXhtml *convert = new QdocXhtml();
  9. QString html = convert->Format(stream); /* tidy try to clean */
  10. for (int i = 0; i < html.size(); ++i) {
  11.  
  12. QChar sign = html.at(i);
  13. const uint asciiPos = (uint)sign.unicode();
  14. bool replaceUnis = false;
  15. if ((asciiPos >= 240) && (asciiPos <= 255)) {
  16. replaceUnis = true;
  17. } else if ((asciiPos >= 224) && (asciiPos <= 239)) {
  18. replaceUnis = true;
  19. } else if ((asciiPos >= 192) && (asciiPos <= 223)) {
  20. replaceUnis = true;
  21. } else if ( asciiPos >7999 ) {
  22. replaceUnis = true;
  23. }
  24. if (replaceUnis) {
  25. /////////QString charU = QString("%1").arg(asciiPos);
  26. ///////QByteArray preU = QByteArray();
  27. //////const int prepender = qBound(1,5 - charU.size(),5);
  28. ///////preU.fill('0',prepender);
  29. ///////const QString UnicodeChar = QString("&#%2%1").arg(charU).arg(preU.data());
  30. const QString UnicodeChar = QString("&#%1").arg(asciiPos);
  31. webchars.insert(asciiPos,UnicodeChar);
  32. }
  33.  
  34.  
  35. }
  36. QMapIterator<uint,QString> i(webchars);
  37. while (i.hasNext()) {
  38. i.next();
  39. html = html.replace(QChar(i.key()),i.value());
  40. }
  41. return html;
  42. }
To copy to clipboard, switch view to plain text mode 


and his is not disapper why? or font() not having sign to draw its ... how i can solve this?