PDA

View Full Version : QHtml Encoding Problems



kingfinn
5th June 2010, 13:06
Hi there,

I am running QHtml to get a Website with German chars.
For some reason they are not encoded (e.g. &auml), but come raw ('ä').

After getting it, I want to to run it through HTML tidy (to make it valid XML) and then parse it and search for text, but the German chars are not processed correctly.

main.cpp:

#include <QtCore/QCoreApplication>
#include <QProcess>
#include <QBuffer>
#include <QDomDocument>
#include <iostream>
#include "synchttp.h"
#include <QFile>

using namespace std;

int main(int argc, char **argv )
{
QCoreApplication a(argc, argv);
QBuffer buffer;
SyncHTTP http("back2hack.cc");
http.syncGet("/", &buffer);
QByteArray ar = buffer.data();
ar.replace("ä", "ae");
QProcess process;
QStringList arguments;
arguments << "-q" << "-asxml";
process.start("./tidy", arguments );
process.waitForStarted();
process.write(ar);
process.waitForBytesWritten();
process.closeWriteChannel();

process.waitForFinished();
QByteArray array = process.readAllStandardOutput();
//std::cout << array.data();
QDomDocument doc;
QString error;
int row, column;
doc.setContent(array, false, &error, &row, &column );
std::cout << error.toLatin1().data();
//std::cout << doc.firstChild().toText().data().toLatin1().data() ;
QString text;
QList<QDomNode> l;
QDomElement element = doc.documentElement();
QDomNode c = element.firstChild();
while(!c.isNull()) {
QDomNode n = c.nextSibling();
while(!c.isNull()) {
if(c.toElement().text() == "Blogsystem: Einträge absteigend sortieren" )
l.append(c);
c = c.nextSibling();
}
n = n.firstChild();
}
foreach(QDomNode n, l) {
cout << qPrintable(n.toElement().text());
cout << qPrintable(n.toElement().tagName());
cout << l.length();
}
QFile file("pwn");
file.open(QFile::WriteOnly);
file.write(buffer.data());
return 0;
}

synchttp.h:

/************************************************** *************************
* Copyright (C) 2005 by Iulian M *
* eti@erata.net *
************************************************** *************************/
#ifndef ETKSYNCHTTP_H
#define ETKSYNCHTTP_H

#include <QHttp>
#include <QEventLoop>
#include <QBuffer>

/**
* Provide a synchronous api over QHttp
* Uses a QEventLoop to block until the request is completed
* @author Iulian M <eti@erata.net>
*/
class SyncHTTP: public QHttp
{
Q_OBJECT
public:
/// structors
SyncHTTP( QObject * parent = 0 )
:QHttp(parent),requestID(-1),status(false){}

SyncHTTP( const QString & hostName, quint16 port = 80, QObject * parent = 0 )
:QHttp(hostName,port,parent),requestID(-1),status(false){}

virtual ~SyncHTTP(){}

/// send GET request and wait until finished
bool syncGet ( const QString & path, QIODevice * to = 0 )
{
///connect the requestFinished signal to our finished slot
connect(this,SIGNAL(requestFinished(int,bool)),SLO T(finished(int,bool)));
/// start the request and store the requestID
requestID = get(path, to );
/// block until the request is finished
loop.exec();
/// return the request status
return status;
}

/// send POST request and wait until finished
bool syncPost ( const QString & path, QIODevice * data, QIODevice * to = 0 )
{
///connect the requestFinished signal to our finished slot
connect(this,SIGNAL(requestFinished(int,bool)),SLO T(finished(int,bool)));
/// start the request and store the requestID
requestID = post(path, data , to );
/// block until the request is finished
loop.exec();
/// return the request status
return status;
}

bool syncPost ( const QString & path, const QByteArray& data, QIODevice * to = 0 )
{
/// create io device from QByteArray
QBuffer buffer;
buffer.setData(data);
return syncPost(path,&buffer,to);
}

protected slots:
virtual void finished(int idx, bool err)
{
/// check to see if it's the request we made
if(idx!=requestID)
return;
/// set status of the request
status = !err;
/// end the loop
loop.exit();
}

private:
/// id of current request
int requestID;
/// error status of current request
bool status;
/// event loop used to block until request finished
QEventLoop loop;
};

#endif


What could be the reason?

tbscope
5th June 2010, 13:13
You need to encode or decode your text.
This means, you receive data in one codec and send it to another service that expects it in another codec.

See: http://doc.qt.nokia.com/4.6/tools-codecs.html
And the text codec classes.

kingfinn
5th June 2010, 13:26
How can I prove which encoding I got?