#include #include #include #include #include"SP.h" // takes a dna sequence of size (arg1) and translate it to a peptide (arg2). There 6 ways to do that (arg3). // The size N of the dna Short-Read line is in (arg4). void Translate (char*, char*, int, int); // read the dna (arg1) backwards and put the results in dnaComp (arg2). // The size N of the dna Short-Read line is in (arg3). void Complete (char*, char*, int); // strcpy void Copy (char*, char*); // take a SP (arg1) and lookfor it using KMP algo in a peptide (arg2), using the KMP table (arg3) and the length of SP(arg4). // The size of the translated PP, L, is in arg5. int KMPSearch (char*, char*, int*, int, int); // argv[0] is always the excutable file, argv[1] is the dna SHORT READ sequence file // argv[2] is the SPs file and argv[3] is the count output file int main (int argc, char* argv[]) { int N, L; int i, t; char tmp[200000]; char* dna; char* dnaComp; char** pep; FILE* dnaSeq; // Initialize the Specific Pepetides SP* sps = (SP*) malloc(Nsp * sizeof(SP)); initSP(sps, argv); // Set Pointer to the the 6 possible peptides pep = (char**)malloc(6*sizeof(char*)); // open the file that contains the N-size dna seq dnaSeq = fopen(argv[1],"r"); // initialize line number (t), Dna size (N), the associated PP size (L) t = 0; N = 0; L = 0; // read one line at a time while(fgets(tmp, 200000, dnaSeq)!=NULL) { t++; printf("%d\n",t); // determine dna SHORT READ size, N, and the PP associated size N = strlen(tmp) - 1; L = (int)floor(N/3); // initialize 'dna' & 'dnaComp' dna = (char*)malloc((N+1)*sizeof(char)); //strcpy(dna, tmp); Copy(dna, tmp); dnaComp = (char*)malloc((N+1)*sizeof(char)); // Intialize the 6 possible peptides for (i=0; i < 6; i++) { pep[i] = (char*)malloc((L+1)*sizeof(char)); } // translate the dna to peptide in FF way (rotation 3 ) Translate(dna, pep[0], 0, N); Translate(dna, pep[1], 1, N); Translate(dna, pep[2], 2, N); // read the dna backwards Complete(dna, dnaComp, N); // translate the dnaComp (also 3 possibilities) Translate(dnaComp, pep[3], 0, N); Translate(dnaComp, pep[4], 1, N); Translate(dnaComp, pep[5], 2, N); // Go over SPs and see if there is a match using KMP for (i=0; i < Nsp; i++) { if ( KMPSearch (sps[i]._seq, pep[0], sps[i]._table, sps[i]._length, L) >= 0) { sps[i]._sr[sps[i]._count] = t; sps[i]._frame[sps[i]._count] = 0; sps[i]._count++; sps[i]._sr = (int*)realloc(sps[i]._sr,(sps[i]._count+1)*sizeof(int)); sps[i]._frame = (int*)realloc(sps[i]._frame,(sps[i]._count+1)*sizeof(int)); } if ( KMPSearch (sps[i]._seq, pep[1], sps[i]._table, sps[i]._length, L) >= 0) { sps[i]._sr[sps[i]._count] = t; sps[i]._frame[sps[i]._count] = 1; sps[i]._count++; sps[i]._sr = (int*)realloc(sps[i]._sr,(sps[i]._count+1)*sizeof(int)); sps[i]._frame = (int*)realloc(sps[i]._frame,(sps[i]._count+1)*sizeof(int)); } if ( KMPSearch (sps[i]._seq, pep[2], sps[i]._table, sps[i]._length, L) >= 0) { sps[i]._sr[sps[i]._count] = t; sps[i]._frame[sps[i]._count] = 2; sps[i]._count++; sps[i]._sr = (int*)realloc(sps[i]._sr,(sps[i]._count+1)*sizeof(int)); sps[i]._frame = (int*)realloc(sps[i]._frame,(sps[i]._count+1)*sizeof(int)); } if ( KMPSearch (sps[i]._seq, pep[3], sps[i]._table, sps[i]._length, L) >= 0) { sps[i]._sr[sps[i]._count] = t; sps[i]._frame[sps[i]._count] = 3; sps[i]._count++; sps[i]._sr = (int*)realloc(sps[i]._sr,(sps[i]._count+1)*sizeof(int)); sps[i]._frame = (int*)realloc(sps[i]._frame,(sps[i]._count+1)*sizeof(int)); } if ( KMPSearch (sps[i]._seq, pep[4], sps[i]._table, sps[i]._length, L) >= 0) { sps[i]._sr[sps[i]._count] = t; sps[i]._frame[sps[i]._count] = 4; sps[i]._count++; sps[i]._sr = (int*)realloc(sps[i]._sr,(sps[i]._count+1)*sizeof(int)); sps[i]._frame = (int*)realloc(sps[i]._frame,(sps[i]._count+1)*sizeof(int)); } if ( KMPSearch (sps[i]._seq, pep[5], sps[i]._table, sps[i]._length, L) >= 0) { sps[i]._sr[sps[i]._count] = t; sps[i]._frame[sps[i]._count] = 5; sps[i]._count++; sps[i]._sr = (int*)realloc(sps[i]._sr,(sps[i]._count+1)*sizeof(int)); sps[i]._frame = (int*)realloc(sps[i]._frame,(sps[i]._count+1)*sizeof(int)); } } // free peptides for (i=0; i < 6; i++) { free(pep[i]); } // free dna free(dna); free(dnaComp); } // print to files the count of sps printSP(sps, argv); return 1; } void Copy(char* s1, char* s2) { int i, N; N = strlen(s2); for (i=0; i < N ; i++) { s1[i] = s2[i]; } //s1[i] = '\0'; } // W is the word (a SP) to search in P (Pepetide) int KMPSearch (char* W, char* P, int* T, int Wsize, int L) { // i is the index in W, m the index of P int i = 0; int m = 0; // while m+i is less than the length of P while (m + i < L) { if (W[i] == P[m + i]) { i = i + 1; // if i equals the length of W, if (i == Wsize) return m; } else { m = m + i - T[i]; if (i > 0) i = T[i]; } } // if u r here, there is no match return -1; } void Translate (char* d, char* p, int m, int N) { int i, j; char tmp[4]; // go over the dna sequence and take 3 letters together starting at m for (i=m, j=0; i