// // DESCRIPTION // ----------- // This program implements Inversion Frequency Vector generation // for give Multiset Permutation M of set S = {0,1,2,...,255}. // This program is used to replace Move To Front Module in BZIP2 // Compression. This used due to the fact that BWT generates local // locality of references which can exploited to achieve better // compression on text data. // // This simulates a very simple algorithm. Let '#' be the // operator to concatenate the string. So the algo is as follows : // 1. D0 = <> {Null Set} // 2. D(i) = D(i-1) # T (i), where T(i) = {X1,X2,X3 ... Xfi}. 1<=j<=fi. // X1 = First occurance of i in M. // j > 1, X(j) = number of elements y in M, such that y > i is // between (j-1)th element and jth ocuurance of i. // // // Build Instructions // ------------------ // // Define the constant unix for UNIX or UNIX-like systems. The // use of this constant turns off the code used to force the MS-DOS // file system into binary mode. g++ already does this, your // UNIX C++ compiler might also. // // Borland C++ 4.5 16 bit : bcc -w ifv.cpp // Borland C++ 4.5 32 bit : bcc32 -w ifv.cpp // Microsoft Visual C++ 1.52 : cl /W4 ifv.cpp // Microsoft Visual C++ 2.1 : cl /W4 ifv.cpp // g++ : g++ -o ifv ifv.cpp // // Typical Use // ----------- // ivf input output // #include #if !defined( unix ) #include #endif #include int main( int argc, char *argv[] ){ char M[5001]; /* Block Size Optimal to BWT is used.*/ int L[256]; /* Base Character Set.*/ int S[256]; int T[5001]; /* IN worst case All blocks will have same char.*/ int D[5001]; /* Final Output of Inversion Frequency Vector.*/ fprintf( stderr, "Inversion Frequency Vector Generation ...\n"); if ( argc > 1 ) { freopen( argv[ 1 ], "r", stdin ); fprintf( stderr, "%s", argv[ 1 ] ); } else fprintf( stderr, "stdin" ); fprintf( stderr, " to " ); if ( argc > 2 ) { freopen( argv[ 2 ], "w", stdout ); fprintf( stderr, "%s", argv[ 2 ] ); } else fprintf( stderr, "stdin" ); fprintf( stderr, "\n" ); int c,i=0; // Creating Character Set. for(i=0; i < 255; i ++) L[i] = 0; // Re-Initialise i. i =0; int k = 0,process=1; k = 0; i = 0; while ( ( c = getc( stdin ))) { M[i++] = (unsigned char)c; // Mark c is present in the set S. if(L[(unsigned char)c] == 0){ S[k] = (unsigned char)c; k++; L[(unsigned char)c] = 1; }// End if. }//End while. // Total Number of elements that need to parsed. int nNumberOfElements = i; // Neglect \n. k--; fprintf( stderr, "k = %d\n",k); // We have to construct D(1), D(2) ... D(k), where D= D(k) is // final Inversion Frequency Vector. char Ch; int n = 0,l=0,g=0,t=0,j=0; for( j=0; j < k; j++){ fprintf ( stderr, "Executing D(%d) = D(%d) # T(%d).\n",j+1,j,j+1); fprintf ( stderr, "Finding T(%d).\n",j+1); Ch = (unsigned char)S[j]; g = 0; n = 0; // Find the first occurance of character. // U should have atleast one occurance of the character in the // string M. for(l=0; l < nNumberOfElements; l++) if(M[l] == Ch) break; T[n++] = l+1; l++; for(; l < nNumberOfElements; l++){ if(M[l] > Ch) g++; if(M[l] == Ch){ T[n++] = g; g = 0; }//End if. }// End for. // Print the values. of T. fprintf( stderr, "T(%d)= < ",j+1); for(int h=0; h < n; h++){ D[t++] = T[h]; if(h == n-1) fprintf(stderr, "%d",T[h]); else fprintf(stderr,"%d,",T[h]); }//End for. fprintf(stderr," >\n"); }//End. // Print the values of D. fprintf( stderr,"D(%d) = < ",j+1); for(int h=0; h < t; h++){ if(h == t-1){ fprintf(stderr,"%d",D[h]); fprintf(stdout,"%d",D[h]); }//End if else{ fprintf(stderr,"%d,",D[h]); fprintf(stdout,"%d,",D[h]); }//End Else. if(h%25 == 0){ fprintf(stderr,"\n"); fprintf(stdout,"\n"); }//End if. }//End for. fprintf(stderr," >\n"); }//End main.