PDA

View Full Version : php preg_match_all Qt alternative



alrawab
23rd June 2014, 03:45
hi
is there any way to use php preg_match_all in Qt .

anda_skoa
23rd June 2014, 07:29
You'll need to be a bit more precise.

If you are looking for regular expression handling, see QRegularExpression

Cheers,
_

alrawab
23rd June 2014, 10:51
thanks anda the original code is looks like :

while (preg_match_all("$pattren","$vary","$res")==1){$x++;}
the problem is both QRegExp::exactMatch and regular QString.contains some times gives diffrent resault .

anda_skoa
23rd June 2014, 12:37
Can you given an example?

Cheers,
_

alrawab
23rd June 2014, 15:27
i'm trying to port biophp "Microsatellite repeats finder"
this is the link for demo http://collegesoftwareconnection.com/microsatellites.php

and this is the source code in php :

<html>
<head>
<meta charset="UTF-8">
<title></title>
</head>
<body>
<?php

function includeN_1($primer,$minus) {
$code=".".substr($primer,1);
$wpos=1;
while ($wpos<strlen($primer)-$minus){
$code.="|".substr($primer,0,$wpos).".".substr($primer,$wpos+1);
$wpos++;
}
return ($code);
}



function includeN_2($primer,$minus) {
$max=strlen($primer)-$minus;
$code="";
for($i=0;$i<$max;$i++){
for($j=0;$j<$max-$i-1;$j++){
$code.="|".substr($primer,0,$i).".";
$resto=substr($primer,$i+1);
$code.=substr($resto,0,$j).".".substr($resto,$j+1);
}
}

$code=substr($code,1);
return ($code);
}

function includeN_3($primer,$minus) {
$max=strlen($primer)-$minus;
$code="";
for($i=0;$i<$max;$i++){
for($j=0;$j<$max-$i-1;$j++){
$code.="|".substr($primer,0,$i).".";
$resto=substr($primer,$i+1);
$code.=substr($resto,0,$j).".".substr($resto,$j+1);
}
}
$code=substr($code,1);
return ($code);
}

function find_microsatellite_repeats($sequence,$min_length, $max_length,$min_repeats,$min_length_of_MR,$mismat ches_allowed){
$len_seq=strlen($sequence);
$counter=0;
for ($i=0;$i<$len_seq-3;$i++){
for ($j=$min_length;$j<$max_length+1;$j++){
if (($i+$j)>$len_seq){break;}
$sub_seq=substr($sequence,$i,$j);

$len_sub_seq=strlen ($sub_seq);
$mismatches=floor($len_sub_seq*$mismatches_allowed/100);
if ($mismatches==1){$sub_seq_pattern=includeN_1($sub_ seq,0);}
elseif ($mismatches==2){$sub_seq_pattern=includeN_2($sub_ seq,0);}
elseif ($mismatches==3){$sub_seq_pattern=includeN_3($sub_ seq,0);}
else {$sub_seq_pattern=$sub_seq;}

$matches=1;

while (preg_match_all("/($sub_seq_pattern)/",substr($sequence,($i+$j*$matches),$j),$out)==1){$ matches++;}
echo $matches;
if ($matches>=$min_repeats and ($j*$matches)>=$min_length_of_MR){

$results[$counter]["start_position"]=$i;
$results[$counter]["length"]=$j;
$results[$counter]["repeats"]=$matches;
$results[$counter]["sequence"]=substr($sequence,$i,$j*$matches);
$counter++;
$i+=$j*$matches;
}
}
}
return ($results);
}
//echo includeN_2("saifab",1);
//echo "\n";
//echo includeN_3("saifab",1);

$sequence="AACAATGCCATGATGATGATTATTACGACACAACAACACCGCGCTTGACG GCGGCGGATGGATGCCGCGATCAGACGTTCAACGCCCACGTAACGTAACG CAACGTAACCTAACGACACTGTTAACGGTACGAT";
//print_r( find_microsatellite_repeats($sequence,2,6,3,6,0));
find_microsatellite_repeats($sequence,2,6,3,6,0);
?>
</body>
</html>

and this is my c++ port :

QString includeN_1(const QString & primer,int minus)
{
QString code="."+primer.mid(1);
int wpos=1;

while (wpos<primer.size()-minus){
code+="|"+primer.mid(0,wpos)+"."+primer.mid(wpos+1);
wpos++;
}
return code;
}

QString includeN_2(const QString & primer,int minus)
{
int max=primer.size()-minus;
QString code="";
for(int i=0;i<max;i++){
for(int j=0; j<max-i-1;j++){
code+="|"+primer.mid(0,i)+".";
QString resto=primer.mid(i+1);
code+=resto.mid(0,j)+"."+resto.mid(j+1);

}//j
}
code=code.mid(1);
return code;
}

QString includeN_3(const QString & primer,int minus)
{
int max=primer.size()-minus;
QString code="";
for(int i=0;i<max;i++){
for(int j=0;j<max-i-1;j++){
code+="|"+primer.mid(0,i)+".";
QString resto=primer.mid(i+1);
code+=resto.mid(0,j)+"."+resto.mid(j+1);
}
}
code=code.mid(1);
return code;
}

void FindMicrosatelliteRepeats(QString &seq,int MinLen,int MaxLen,int MinRepeat,int MRMinLen,int MismatchesAllowed)
{

int len_seq=seq.size();
int counter=0;
// QString sub_seq_pattern;
for (int i=0;i<len_seq-3;i++){
for (int j=MinLen;j<MaxLen+1;j++){

if ((i+j)>len_seq)
{break;}
QString sub_seq_pattern;
QString sub_seq=seq.mid(i,j) ;

int len_sub_seq=sub_seq.size();

int mismatches=floor(len_sub_seq*MismatchesAllowed/100);

if (mismatches==1)
{
sub_seq_pattern=includeN_1(sub_seq,0);
}
else if (mismatches==2){sub_seq_pattern=includeN_2(sub_seq ,0);}
else if (mismatches==3){sub_seq_pattern=includeN_3(sub_seq ,0);}
else {sub_seq_pattern=sub_seq;}
int matches=1;
if(seq.mid(i+j*matches,j)==sub_seq_pattern==1)
matches++;

// qDebug()<<matches;
if(matches >=MinRepeat && (j*matches) >=MRMinLen)
{
qDebug()<<"yes";
qDebug()<<"start postion = "<<i;
qDebug()<<"length = "<<j;
qDebug()<<"repeats = "<<matches;
qDebug()<<"sequence = "<<seq.mid(i,j*matches);
counter++;
i+=j*matches;

}//j
}

}//i

}


//================================================== ==============

int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);

QString primer="AACAATGCCATGATGATGATTATTACGACACAACAACACCGCGCTTGACG GCGGCGGATGGATGCCGCGATCAGACGTTCAACGCCCACGTAACGTAACG CAACGTAACCTAACGACACTGTTAACGGTACGAT";

FindMicrosatelliteRepeats(primer,2,6,3,6,0);
return a.exec();
}

the problem in variable matches dont give the same waht the original do (due to preg_match_all).
regards

anda_skoa
23rd June 2014, 17:42
Hmm, your C++ code doesn't seem to have any usage of regular expressions or I am not seeing it.

Cheers,
_

alrawab
23rd June 2014, 19:18
yes the original code comparing the pattrern with chunks from the original string
seq.mid(i+j*matches,j).contains( QRegExp("["+sub_seq_pattern+"]") will gives the same effect.

anda_skoa
23rd June 2014, 20:20
Ah, ok. Misunderstanding. I thought there was still an open question.

Cheers,
_

alrawab
23rd June 2014, 23:23
thanks for your help the problem is solved and this the php port of Microsatellite repeats finder



#include <QCoreApplication>
#include <QtCore>

//================================================== ====================
//**************************Find Microsatellite Repeats****************
//================================================== ====================
QString includeN_1(const QString & primer,int minus)
{
QString code="."+primer.mid(1);
int wpos=1;

while (wpos<primer.size()-minus){
code+="|"+primer.mid(0,wpos)+"."+primer.mid(wpos+1);
wpos++;
}
return code;
}
//----------------------------------------------------------
QString includeN_2(const QString & primer,int minus)
{
int max=primer.size()-minus;
QString code="";
for(int i=0;i<max;i++){
for(int j=0; j<max-i-1;j++){
code+="|"+primer.mid(0,i)+".";
QString resto=primer.mid(i+1);
code+=resto.mid(0,j)+"."+resto.mid(j+1);

}//j
}
code=code.mid(1);
return code;
}
//---------------------------------------------------------------------
QString includeN_3(const QString & primer,int minus)
{
int max=primer.size()-minus;
QString code="";
for(int i=0;i<max;i++){
for(int j=0;j<max-i-1;j++){
code+="|"+primer.mid(0,i)+".";
QString resto=primer.mid(i+1);
code+=resto.mid(0,j)+"."+resto.mid(j+1);
}
}
code=code.mid(1);
return code;
}
//--------------------------------------------------------------------------
void FindMicrosatelliteRepeats(QString &sequence,int min_length,int max_length,int min_repeats,int min_length_of_MR,int mismatches_allowed)
{
int len_seq= sequence.size();
int counter=0;

for (int i=0;i<len_seq-3;i++){

for (int j=min_length;j<max_length+1;j++){

if ((i+j)>len_seq){break;}
QString sub_seq=sequence.mid(i,j);
int len_sub_seq=sub_seq.size();
int mismatches=floor(len_sub_seq*mismatches_allowed/100);
QString sub_seq_pattern;
if (mismatches==1){sub_seq_pattern=includeN_1(sub_seq ,0);}
else if(mismatches==2){sub_seq_pattern=includeN_2(sub_s eq,0);}
else if(mismatches==3){sub_seq_pattern=includeN_3(sub_s eq,0);}
else {sub_seq_pattern=sub_seq;}
int matches=1;
while(sequence.mid((i+j*matches),j).contains(QRegE xp("("+sub_seq_pattern+")"))==true)matches++;


if (matches>=min_repeats && (j*matches)>=min_length_of_MR){
qDebug()<<"start_position"<<i;
qDebug()<<"length"<<j;
qDebug()<<"repeats"<<matches;
qDebug()<<"sequence"<<sequence.mid(i,j*matches);
counter++;
i+=j*matches;
}


}//j

}//i


}

//================================================== ======================

int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);

QString primer="AACAATGCCATGATGATGATTATTACGACACAACAACACCGCGCTTGACG GCGGCGGATGGATGCCGCGATCAGACGTTCAACGCCCACGTAACGTAACG CAACGTAACCTAACGACACTGTTAACGGTACGAT";

FindMicrosatelliteRepeats(primer,2,6,3,6,0);
return a.exec();
}