|
gnFilter.cppGo to the documentation of this file.00001 00002 // File: gnFilter.h 00003 // Purpose: Filter for all Sequences 00004 // Description: Filters sequences, translates, reverse complement, converts 00005 // additions, etc. 00006 // Changes: 00007 // Version: libGenome 0.1.0 00008 // Author: Aaron Darling 00009 // Last Edited: April 15, 2001, 11:13:00pm 00010 // Modified by: 00011 // Copyright: (c) Aaron Darling 00012 // Licenses: Proprietary 00014 #include "gn/gnFilter.h" 00015 #include "gn/gnDebug.h" 00016 00017 // public: 00018 const gnFilter *gnFilter::alphabetCharacterFilter(){ 00019 const static gnFilter* t_filt = new gnFilter(alphabetCharacterFilterType); 00020 return t_filt; 00021 } 00022 00023 const gnFilter *gnFilter::numberCharacterFilter(){ 00024 const static gnFilter* t_filt = new gnFilter(numberCharacterFilterType); 00025 return t_filt; 00026 } 00027 00028 00029 const gnFilter *gnFilter::proteinSeqFilter(){ 00030 const static gnFilter* t_filt = new gnFilter(proteinSeqFilterType); 00031 return t_filt; 00032 } 00033 00034 const gnFilter *gnFilter::basicDNASeqFilter(){ 00035 const static gnFilter* t_filt = new gnFilter(basicDNASeqFilterType); 00036 return t_filt; 00037 } 00038 00039 const gnFilter *gnFilter::fullDNASeqFilter(){ 00040 const static gnFilter* t_filt = new gnFilter(fullDNASeqFilterType); 00041 return t_filt; 00042 } 00043 00044 const gnFilter *gnFilter::basicRNASeqFilter(){ 00045 const static gnFilter* t_filt = new gnFilter(basicRNASeqFilterType); 00046 return t_filt; 00047 } 00048 00049 const gnFilter *gnFilter::fullRNASeqFilter(){ 00050 const static gnFilter* t_filt = new gnFilter(fullRNASeqFilterType); 00051 return t_filt; 00052 } 00053 00054 const gnFilter *gnFilter::DNAtoRNAFilter(){ 00055 const static gnFilter* t_filt = new gnFilter(DNAtoRNAFilterType); 00056 return t_filt; 00057 } 00058 00059 const gnFilter *gnFilter::RNAtoDNAFilter(){ 00060 const static gnFilter* t_filt = new gnFilter(RNAtoDNAFilterType); 00061 return t_filt; 00062 } 00063 00064 const gnFilter *gnFilter::DNAComplementFilter(){ 00065 const static gnFilter* t_filt = new gnFilter(DNAComplementFilterType); 00066 return t_filt; 00067 } 00068 00069 const gnFilter *gnFilter::RNAComplementFilter(){ 00070 const static gnFilter* t_filt = new gnFilter(RNAComplementFilterType); 00071 return t_filt; 00072 } 00073 00074 00075 // public: 00076 gnFilter::gnFilter() 00077 { 00078 m_defaultChar = 'n'; 00079 m_rDefaultChar = 'n'; 00080 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i ) 00081 m_pairArray[i] = NO_REVCOMP_CHAR; 00082 } 00083 gnFilter::gnFilter( const gnSeqC defaultChar, const gnSeqC rdefaultChar ) 00084 { 00085 m_defaultChar = defaultChar; 00086 m_rDefaultChar = rdefaultChar; 00087 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i ) 00088 m_pairArray[i] = NO_REVCOMP_CHAR; 00089 } 00090 00091 gnFilter::gnFilter( const gnFilter &sf ) 00092 { 00093 m_name = sf.m_name; 00094 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i ) 00095 m_pairArray[i] = sf.m_pairArray[i]; 00096 m_defaultChar = sf.m_defaultChar; 00097 m_rDefaultChar = sf.m_rDefaultChar; 00098 } 00099 00100 gnFilter::gnFilter( const gnFilterType f_type ){ 00101 for( gnSeqC i = 0; i < GNSEQC_MAX; ++i ) 00102 m_pairArray[i] = NO_REVCOMP_CHAR; 00103 switch(f_type){ 00104 case alphabetCharacterFilterType: 00105 CreateAlphabetCharacterFilter(); 00106 break; 00107 case numberCharacterFilterType: 00108 CreateNumberCharacterFilter(); 00109 break; 00110 case proteinSeqFilterType: 00111 CreateProteinFilter(); 00112 break; 00113 case basicDNASeqFilterType: 00114 CreateBasicDNAFilter(); 00115 break; 00116 case fullDNASeqFilterType: 00117 CreateFullDNAFilter(); 00118 break; 00119 case basicRNASeqFilterType: 00120 CreateBasicRNAFilter(); 00121 break; 00122 case fullRNASeqFilterType: 00123 CreateFullRNAFilter(); 00124 break; 00125 case DNAtoRNAFilterType: 00126 CreateDNAtoRNAFilter(); 00127 break; 00128 case RNAtoDNAFilterType: 00129 CreateRNAtoDNAFilter(); 00130 break; 00131 case DNAComplementFilterType: 00132 CreateDNAComplementFilter(); 00133 break; 00134 case RNAComplementFilterType: 00135 CreateRNAComplementFilter(); 00136 break; 00137 } 00138 } 00139 00140 00141 gnFilter::~gnFilter() 00142 { 00143 } 00144 00145 inline 00146 void gnFilter::Filter( gnSeqC** seq, uint32& len ) const 00147 { 00148 gnSeqC* tmp = new gnSeqC[len]; 00149 gnSeqI c=0; 00150 for(uint32 i=0; i < len; i++) 00151 if(IsValid((*seq)[i])) 00152 tmp[c++] = m_pairArray[(*seq)[i]]; 00153 len = c; 00154 memcpy(*seq, tmp, len); 00155 delete[] tmp; 00156 } 00157 00158 void gnFilter::ReverseFilter( gnSeqC** seq, uint32& len ) const 00159 { 00160 gnSeqC tmp, dum; 00161 uint32 halfLen = len/2; 00162 uint32 end = len - 1; 00163 uint32 curB = 0; 00164 uint32 curE = end; 00165 for( uint32 i=0; i < halfLen ; ++i ) 00166 { 00167 tmp = m_pairArray[(*seq)[i]]; 00168 dum = m_pairArray[(*seq)[ end - i ]]; 00169 if(dum != NO_REVCOMP_CHAR) 00170 (*seq)[ curB++ ] = dum; 00171 if(tmp != NO_REVCOMP_CHAR) 00172 (*seq)[ curE-- ] = tmp; 00173 } 00174 if(len&0x1){ 00175 tmp = m_pairArray[(*seq)[halfLen]]; 00176 if(tmp != NO_REVCOMP_CHAR) 00177 (*seq)[curB++] = tmp; 00178 } 00179 // now for the memmove 00180 if(curE >= curB){ 00181 memmove(*seq+curB, *seq+curE+1, end - curE); 00182 len = end - curE + curB; 00183 } 00184 00185 } 00186 00187 void gnFilter::Filter( string &seq ) const 00188 { 00189 gnSeqI c=0; 00190 for(uint32 i=0; i < seq.length(); i++) 00191 if(IsValid(seq[i])) 00192 seq[c++] = m_pairArray[seq[i]]; 00193 } 00194 00195 void gnFilter::ReverseFilter( string &seq ) const 00196 { 00197 gnSeqC tmp, dum; 00198 uint32 halfLen = seq.length()/2; 00199 uint32 end = seq.length() - 1; 00200 uint32 curB = 0; 00201 uint32 curE = end; 00202 for( uint32 i=0; i < halfLen ; ++i ) 00203 { 00204 tmp = m_pairArray[seq[i]]; 00205 dum = m_pairArray[seq[ end - i ]]; 00206 if(dum != NO_REVCOMP_CHAR) 00207 seq[ curB++ ] = dum; 00208 if(tmp != NO_REVCOMP_CHAR) 00209 seq[ curE-- ] = tmp; 00210 } 00211 if(seq.length()&0x1){ 00212 tmp = m_pairArray[seq[halfLen]]; 00213 if(tmp != NO_REVCOMP_CHAR) 00214 seq[curB++] = tmp; 00215 } 00216 // now for the memmove 00217 if(curE >= curB){ 00218 seq.erase(curB, curE-curB); 00219 } 00220 } 00221 00222 // standard filters 00223 void gnFilter::CreateAlphabetCharacterFilter() 00224 { 00225 SetDefaultChar( 0, 0 ); 00226 SetName( "Alphabet Character Filter" ); 00227 SetPair( 'A', 'a' ); 00228 SetPair( 'B', 'b' ); 00229 SetPair( 'C', 'c' ); 00230 SetPair( 'D', 'd' ); 00231 SetPair( 'E', 'e' ); 00232 SetPair( 'F', 'f' ); 00233 SetPair( 'G', 'g' ); 00234 SetPair( 'H', 'h' ); 00235 SetPair( 'I', 'i' ); 00236 SetPair( 'J', 'j' ); 00237 SetPair( 'K', 'k' ); 00238 SetPair( 'L', 'l' ); 00239 SetPair( 'M', 'm' ); 00240 SetPair( 'N', 'n' ); 00241 SetPair( 'O', 'o' ); 00242 SetPair( 'P', 'p' ); 00243 SetPair( 'Q', 'q' ); 00244 SetPair( 'R', 'r' ); 00245 SetPair( 'S', 's' ); 00246 SetPair( 'T', 't' ); 00247 SetPair( 'U', 'u' ); 00248 SetPair( 'V', 'v' ); 00249 SetPair( 'W', 'w' ); 00250 SetPair( 'X', 'x' ); 00251 SetPair( 'Y', 'y' ); 00252 SetPair( 'Z', 'z' ); 00253 } 00254 00255 void gnFilter::CreateNumberCharacterFilter() 00256 { 00257 SetDefaultChar( 0, 0 ); 00258 SetName( "Number Character Filter" ); 00259 SetSingle( '0' ); 00260 SetSingle( '1' ); 00261 SetSingle( '2' ); 00262 SetSingle( '3' ); 00263 SetSingle( '4' ); 00264 SetSingle( '5' ); 00265 SetSingle( '6' ); 00266 SetSingle( '7' ); 00267 SetSingle( '8' ); 00268 SetSingle( '9' ); 00269 } 00270 00271 void gnFilter::CreateProteinFilter() 00272 { 00273 SetDefaultChar( 'u', 'u' ); 00274 SetName( "Protein Filter" ); 00275 SetSingle( 'A' ); 00276 SetSingle( 'R' ); 00277 SetSingle( 'N' ); 00278 SetSingle( 'D' ); 00279 SetSingle( 'C' ); 00280 SetSingle( 'Q' ); 00281 SetSingle( 'E' ); 00282 SetSingle( 'G' ); 00283 SetSingle( 'H' ); 00284 SetSingle( 'I' ); 00285 SetSingle( 'L' ); 00286 SetSingle( 'K' ); 00287 SetSingle( 'M' ); 00288 SetSingle( 'F' ); 00289 SetSingle( 'P' ); 00290 SetSingle( 'S' ); 00291 SetSingle( 'T' ); 00292 SetSingle( 'W' ); 00293 SetSingle( 'Y' ); 00294 SetSingle( 'V' ); 00295 00296 SetSingle( 'a' ); 00297 SetSingle( 'r' ); 00298 SetSingle( 'n' ); 00299 SetSingle( 'd' ); 00300 SetSingle( 'c' ); 00301 SetSingle( 'q' ); 00302 SetSingle( 'e' ); 00303 SetSingle( 'g' ); 00304 SetSingle( 'h' ); 00305 SetSingle( 'i' ); 00306 SetSingle( 'l' ); 00307 SetSingle( 'k' ); 00308 SetSingle( 'm' ); 00309 SetSingle( 'f' ); 00310 SetSingle( 'p' ); 00311 SetSingle( 's' ); 00312 SetSingle( 't' ); 00313 SetSingle( 'w' ); 00314 SetSingle( 'y' ); 00315 SetSingle( 'v' ); 00316 } 00317 00318 void gnFilter::CreateBasicDNAFilter() 00319 { 00320 SetDefaultChar( 'n', 'n' ); 00321 SetName( "Basic DNA Filter" ); 00322 SetSingle( 'a' ); 00323 SetSingle( 'c' ); 00324 SetSingle( 'g' ); 00325 SetSingle( 't' ); 00326 SetSingle( 'A' ); 00327 SetSingle( 'C' ); 00328 SetSingle( 'G' ); 00329 SetSingle( 'T' ); 00330 SetSingle( 'n' ); 00331 SetSingle( 'N' ); 00332 SetSingle( 'x' ); 00333 SetSingle( 'X' ); 00334 SetSingle( '-' ); 00335 } 00336 void gnFilter::CreateFullDNAFilter() 00337 { 00338 SetDefaultChar( 'n', 'n' ); 00339 SetName( "Full DNA Filter" ); 00340 SetSingle( 'a' ); 00341 SetSingle( 'c' ); 00342 SetSingle( 'g' ); 00343 SetSingle( 't' ); 00344 SetSingle( 'A' ); 00345 SetSingle( 'C' ); 00346 SetSingle( 'G' ); 00347 SetSingle( 'T' ); 00348 SetSingle( 'r' ); 00349 SetSingle( 'y' ); 00350 SetSingle( 'k' ); 00351 SetSingle( 'm' ); 00352 SetSingle( 'b' ); 00353 SetSingle( 'v' ); 00354 SetSingle( 'd' ); 00355 SetSingle( 'h' ); 00356 SetSingle( 'R' ); 00357 SetSingle( 'Y' ); 00358 SetSingle( 'K' ); 00359 SetSingle( 'M' ); 00360 SetSingle( 'B' ); 00361 SetSingle( 'V' ); 00362 SetSingle( 'D' ); 00363 SetSingle( 'H' ); 00364 SetSingle( 's' ); 00365 SetSingle( 'S' ); 00366 SetSingle( 'w' ); 00367 SetSingle( 'W' ); 00368 SetSingle( 'n' ); 00369 SetSingle( 'N' ); 00370 SetSingle( 'x' ); 00371 SetSingle( 'X' ); 00372 SetSingle( '-' ); 00373 } 00374 void gnFilter::CreateBasicRNAFilter() 00375 { 00376 SetDefaultChar( 'n', 'n' ); 00377 SetName( "Basic RNA Filter" ); 00378 SetSingle( 'a' ); 00379 SetSingle( 'c' ); 00380 SetSingle( 'g' ); 00381 SetSingle( 'u' ); 00382 SetSingle( 'A' ); 00383 SetSingle( 'C' ); 00384 SetSingle( 'G' ); 00385 SetSingle( 'U' ); 00386 SetSingle( 'n' ); 00387 SetSingle( 'N' ); 00388 SetSingle( '-' ); 00389 } 00390 void gnFilter::CreateFullRNAFilter() 00391 { 00392 SetDefaultChar( 'n', 'n' ); 00393 SetName( "Full RNA Filter" ); 00394 SetSingle( 'a' ); 00395 SetSingle( 'c' ); 00396 SetSingle( 'g' ); 00397 SetSingle( 'u' ); 00398 SetSingle( 'A' ); 00399 SetSingle( 'C' ); 00400 SetSingle( 'G' ); 00401 SetSingle( 'U' ); 00402 SetSingle( 'r' ); 00403 SetSingle( 'y' ); 00404 SetSingle( 'k' ); 00405 SetSingle( 'm' ); 00406 SetSingle( 'b' ); 00407 SetSingle( 'v' ); 00408 SetSingle( 'd' ); 00409 SetSingle( 'h' ); 00410 SetSingle( 'R' ); 00411 SetSingle( 'Y' ); 00412 SetSingle( 'K' ); 00413 SetSingle( 'M' ); 00414 SetSingle( 'B' ); 00415 SetSingle( 'V' ); 00416 SetSingle( 'D' ); 00417 SetSingle( 'H' ); 00418 SetSingle( 's' ); 00419 SetSingle( 'S' ); 00420 SetSingle( 'w' ); 00421 SetSingle( 'W' ); 00422 SetSingle( 'n' ); 00423 SetSingle( 'N' ); 00424 SetSingle( '-' ); 00425 } 00426 00427 00428 void gnFilter::CreateDNAtoRNAFilter(){ 00429 SetDefaultChar( 'n', 'n' ); 00430 SetName( "Full DNA to RNA Filter" ); 00431 SetSingle( 'a' ); 00432 SetSingle( 'c' ); 00433 SetSingle( 'g' ); 00434 SetPair( 't', 'u' ); 00435 SetSingle( 'A' ); 00436 SetSingle( 'C' ); 00437 SetSingle( 'G' ); 00438 SetPair( 'T', 'U' ); 00439 SetSingle( 'r' ); 00440 SetSingle( 'y' ); 00441 SetSingle( 'k' ); 00442 SetSingle( 'm' ); 00443 SetSingle( 'b' ); 00444 SetSingle( 'v' ); 00445 SetSingle( 'd' ); 00446 SetSingle( 'h' ); 00447 SetSingle( 'R' ); 00448 SetSingle( 'Y' ); 00449 SetSingle( 'K' ); 00450 SetSingle( 'M' ); 00451 SetSingle( 'B' ); 00452 SetSingle( 'V' ); 00453 SetSingle( 'D' ); 00454 SetSingle( 'H' ); 00455 SetSingle( 's' ); 00456 SetSingle( 'S' ); 00457 SetSingle( 'w' ); 00458 SetSingle( 'W' ); 00459 SetSingle( 'n' ); 00460 SetSingle( 'N' ); 00461 SetSingle( '-' ); 00462 } 00463 00464 void gnFilter::CreateRNAtoDNAFilter(){ 00465 SetDefaultChar( 'n', 'n' ); 00466 SetName( "Full RNA to DNA Filter" ); 00467 SetSingle( 'a' ); 00468 SetSingle( 'c' ); 00469 SetSingle( 'g' ); 00470 SetPair( 'u', 't' ); 00471 SetSingle( 'A' ); 00472 SetSingle( 'C' ); 00473 SetSingle( 'G' ); 00474 SetPair( 'U', 'T' ); 00475 SetSingle( 'r' ); 00476 SetSingle( 'y' ); 00477 SetSingle( 'k' ); 00478 SetSingle( 'm' ); 00479 SetSingle( 'b' ); 00480 SetSingle( 'v' ); 00481 SetSingle( 'd' ); 00482 SetSingle( 'h' ); 00483 SetSingle( 'R' ); 00484 SetSingle( 'Y' ); 00485 SetSingle( 'K' ); 00486 SetSingle( 'M' ); 00487 SetSingle( 'B' ); 00488 SetSingle( 'V' ); 00489 SetSingle( 'D' ); 00490 SetSingle( 'H' ); 00491 SetSingle( 's' ); 00492 SetSingle( 'S' ); 00493 SetSingle( 'w' ); 00494 SetSingle( 'W' ); 00495 SetSingle( 'n' ); 00496 SetSingle( 'N' ); 00497 SetSingle( '-' ); 00498 } 00499 00500 void gnFilter::CreateDNAComplementFilter(){ 00501 SetDefaultChar( 'n', 'n' ); 00502 SetName( "Full DNA Complement Filter" ); 00503 SetPair( 'a', 't' ); 00504 SetPair( 'A', 'T' ); 00505 SetPair( 't', 'a' ); 00506 SetPair( 'T', 'A' ); 00507 SetPair( 'c', 'g' ); 00508 SetPair( 'C', 'G' ); 00509 SetPair( 'g', 'c' ); 00510 SetPair( 'G', 'C' ); 00511 SetPair( 'r', 'y' ); 00512 SetPair( 'R', 'Y' ); 00513 SetPair( 'y', 'r' ); 00514 SetPair( 'Y', 'R' ); 00515 SetPair( 'k', 'm' ); 00516 SetPair( 'K', 'M' ); 00517 SetPair( 'm', 'k' ); 00518 SetPair( 'M', 'K' ); 00519 SetSingle( 's' ); 00520 SetSingle( 'S' ); 00521 SetSingle( 'w' ); 00522 SetSingle( 'W' ); 00523 SetPair( 'b', 'v' ); 00524 SetPair( 'B', 'V' ); 00525 SetPair( 'v', 'b' ); 00526 SetPair( 'V', 'B' ); 00527 SetPair( 'd', 'h' ); 00528 SetPair( 'D', 'H' ); 00529 SetPair( 'h', 'd' ); 00530 SetPair( 'H', 'D' ); 00531 SetSingle( 'n' ); 00532 SetSingle( 'N' ); 00533 SetSingle( 'x' ); 00534 SetSingle( 'X' ); 00535 SetSingle( '-' ); 00536 } 00537 00538 void gnFilter::CreateRNAComplementFilter(){ 00539 SetDefaultChar( 'n', 'n' ); 00540 SetName( "Full RNA Complement Filter" ); 00541 SetPair( 'a', 'u' ); 00542 SetPair( 'A', 'U' ); 00543 SetPair( 'u', 'a' ); 00544 SetPair( 'U', 'A' ); 00545 SetPair( 'c', 'g' ); 00546 SetPair( 'C', 'G' ); 00547 SetPair( 'g', 'c' ); 00548 SetPair( 'G', 'C' ); 00549 SetPair( 'r', 'y' ); 00550 SetPair( 'R', 'Y' ); 00551 SetPair( 'y', 'r' ); 00552 SetPair( 'Y', 'R' ); 00553 SetPair( 'k', 'm' ); 00554 SetPair( 'K', 'M' ); 00555 SetPair( 'm', 'k' ); 00556 SetPair( 'M', 'K' ); 00557 SetSingle( 's' ); 00558 SetSingle( 'S' ); 00559 SetSingle( 'w' ); 00560 SetSingle( 'W' ); 00561 SetPair( 'b', 'v' ); 00562 SetPair( 'B', 'V' ); 00563 SetPair( 'v', 'b' ); 00564 SetPair( 'V', 'B' ); 00565 SetPair( 'd', 'h' ); 00566 SetPair( 'D', 'H' ); 00567 SetPair( 'h', 'd' ); 00568 SetPair( 'H', 'D' ); 00569 SetSingle( 'n' ); 00570 SetSingle( 'N' ); 00571 SetSingle( '-' ); 00572 } Generated at Fri Nov 30 15:36:51 2001 for libGenome by 1.2.8.1 written by Dimitri van Heesch, © 1997-2001 |