Hi there,

I am running QHtml to get a Website with German chars.
For some reason they are not encoded (e.g. &auml), but come raw ('ä').

After getting it, I want to to run it through HTML tidy (to make it valid XML) and then parse it and search for text, but the German chars are not processed correctly.

main.cpp:
Qt Code:
  1. #include <QtCore/QCoreApplication>
  2. #include <QProcess>
  3. #include <QBuffer>
  4. #include <QDomDocument>
  5. #include <iostream>
  6. #include "synchttp.h"
  7. #include <QFile>
  8.  
  9. using namespace std;
  10.  
  11. int main(int argc, char **argv )
  12. {
  13. QCoreApplication a(argc, argv);
  14. QBuffer buffer;
  15. SyncHTTP http("back2hack.cc");
  16. http.syncGet("/", &buffer);
  17. QByteArray ar = buffer.data();
  18. ar.replace("ä", "ae");
  19. QProcess process;
  20. QStringList arguments;
  21. arguments << "-q" << "-asxml";
  22. process.start("./tidy", arguments );
  23. process.waitForStarted();
  24. process.write(ar);
  25. process.waitForBytesWritten();
  26. process.closeWriteChannel();
  27.  
  28. process.waitForFinished();
  29. QByteArray array = process.readAllStandardOutput();
  30. //std::cout << array.data();
  31. QString error;
  32. int row, column;
  33. doc.setContent(array, false, &error, &row, &column );
  34. std::cout << error.toLatin1().data();
  35. //std::cout << doc.firstChild().toText().data().toLatin1().data();
  36. QString text;
  37. QList<QDomNode> l;
  38. QDomElement element = doc.documentElement();
  39. QDomNode c = element.firstChild();
  40. while(!c.isNull()) {
  41. QDomNode n = c.nextSibling();
  42. while(!c.isNull()) {
  43. if(c.toElement().text() == "Blogsystem: Einträge absteigend sortieren" )
  44. l.append(c);
  45. c = c.nextSibling();
  46. }
  47. n = n.firstChild();
  48. }
  49. foreach(QDomNode n, l) {
  50. cout << qPrintable(n.toElement().text());
  51. cout << qPrintable(n.toElement().tagName());
  52. cout << l.length();
  53. }
  54. QFile file("pwn");
  55. file.open(QFile::WriteOnly);
  56. file.write(buffer.data());
  57. return 0;
  58. }
To copy to clipboard, switch view to plain text mode 

synchttp.h:
Qt Code:
  1. /***************************************************************************
  2.  * Copyright (C) 2005 by Iulian M *
  3.  * eti@erata.net *
  4.  ***************************************************************************/
  5. #ifndef ETKSYNCHTTP_H
  6. #define ETKSYNCHTTP_H
  7.  
  8. #include <QHttp>
  9. #include <QEventLoop>
  10. #include <QBuffer>
  11.  
  12. /**
  13.  * Provide a synchronous api over QHttp
  14.  * Uses a QEventLoop to block until the request is completed
  15.  * @author Iulian M <eti@erata.net>
  16. */
  17. class SyncHTTP: public QHttp
  18. {
  19. Q_OBJECT
  20. public:
  21. /// structors
  22. SyncHTTP( QObject * parent = 0 )
  23. :QHttp(parent),requestID(-1),status(false){}
  24.  
  25. SyncHTTP( const QString & hostName, quint16 port = 80, QObject * parent = 0 )
  26. :QHttp(hostName,port,parent),requestID(-1),status(false){}
  27.  
  28. virtual ~SyncHTTP(){}
  29.  
  30. /// send GET request and wait until finished
  31. bool syncGet ( const QString & path, QIODevice * to = 0 )
  32. {
  33. ///connect the requestFinished signal to our finished slot
  34. connect(this,SIGNAL(requestFinished(int,bool)),SLOT(finished(int,bool)));
  35. /// start the request and store the requestID
  36. requestID = get(path, to );
  37. /// block until the request is finished
  38. loop.exec();
  39. /// return the request status
  40. return status;
  41. }
  42.  
  43. /// send POST request and wait until finished
  44. bool syncPost ( const QString & path, QIODevice * data, QIODevice * to = 0 )
  45. {
  46. ///connect the requestFinished signal to our finished slot
  47. connect(this,SIGNAL(requestFinished(int,bool)),SLOT(finished(int,bool)));
  48. /// start the request and store the requestID
  49. requestID = post(path, data , to );
  50. /// block until the request is finished
  51. loop.exec();
  52. /// return the request status
  53. return status;
  54. }
  55.  
  56. bool syncPost ( const QString & path, const QByteArray& data, QIODevice * to = 0 )
  57. {
  58. /// create io device from QByteArray
  59. QBuffer buffer;
  60. buffer.setData(data);
  61. return syncPost(path,&buffer,to);
  62. }
  63.  
  64. protected slots:
  65. virtual void finished(int idx, bool err)
  66. {
  67. /// check to see if it's the request we made
  68. if(idx!=requestID)
  69. return;
  70. /// set status of the request
  71. status = !err;
  72. /// end the loop
  73. loop.exit();
  74. }
  75.  
  76. private:
  77. /// id of current request
  78. int requestID;
  79. /// error status of current request
  80. bool status;
  81. /// event loop used to block until request finished
  82. QEventLoop loop;
  83. };
  84.  
  85. #endif
To copy to clipboard, switch view to plain text mode 

What could be the reason?