#include #include #include #include #include #define MAX (LONG_MAX-2) #define min(a,b) ((a<=b) ? (a) : (b)) //#define _stand_alone // _stand_alone defines if this is a subprogram or a stand alone program // in other words, it either blocks or unblocks main() //#define _debug // _debug controls displaying debugging info #include "Lstring.hpp" #include "lynarr.hpp" /******************************************************************************** ******************************************************************************** *************************** bsla code ****************************************** ******************************************************************************** *******************************************************************************/ /* ******************************************************************* * global data for bsla * *******************************************************************/ long N; long nil=-1; long* prev; long* Gprev; long* Gnext; long* Gstart; long* Gcntxt; long* Cprev; long* Cnext; long* Gmemb; long* Gval; long* auxA; long* auxB; long* auxC; long Cstart; long last_group_number; long* prev_stack; // it is only used to setup the initial prev[] long prevTop; /* *************************************************************************** * auxiliary functions * ***************************************************************************/ #define push_prev(a) prev_stack[++prevTop]=a #define pop_prev() prev_stack[prevTop--] #define top_prev() ((prevTop<0)?(nil):(prev_stack[prevTop])) #ifdef _debug // show contents of the stack used for computing the initila prev[] void show_prev_stack(Lstring& istring,FILE *fp=stdout) { fprintf(fp,"prev_stack:\n"); if (prevTop<0) { fprintf(fp,"\n"); return; } fprintf(fp,"%d",istring[prev_stack[0]]); for(long i=1; i<=prevTop; i++) fprintf(fp,",%d",istring[prev_stack[i]]); fputc('\n',fp); }//end show_prev_stack #endif #ifdef _debug // only used for testing -- show the contents of prev[] void show_prev(Lstring& prev,FILE* fp=stdout) { fprintf(fp,"prev:\n"); for(long i=0; i\n",gr); return; } l=Gcntxt[gr]; fprintf(fp,"group %d={%d",getGsize(gr),i); e=getGend(gr); while(true) { if (e==i) break; i=Gnext[i]; fprintf(fp,",%d",i); } fprintf(fp,"}\n"); }//show group #endif #ifdef _debug // if procgr=nil -- initial conf // otherwise after procgr processed void show_conf(Lstring& istring,long procgr=-2,FILE* fp=stdout) { long i, j, s, e, last_group; fprintf(fp,"Configuration: --------------------------------\n"); for(j=Cstart; j!=nil; j=Cnext[j]) { show_group(j,istring); last_group=j; } if (procgr>-2) { if (procgr==nil) { s=Gstart[last_group]; e=getGend(last_group); for(i=s; ; i=Gnext[i]) { fprintf(fp,"prev[%d]=%d\n",i,prev[i]); if (i==e) break; } }else if (procgr==Cstart) { fprintf(fp,"no prev[] as proc. group %d is the first\n",procgr); }else{ j=Cprev[procgr]; s=Gstart[j]; e=getGend(j); if (s!=nil) { for(i=s; ; i=Gnext[i]) { fprintf(fp,"prev[%d]=%d\n",i,prev[i]); if (i==e) break; } } } } fprintf(fp,"End of Configuration ---------------------------\n"); }//end show_conf #endif // prototypes for testing functions #ifdef _debug void check_conf(Lstring&,long); #endif /* ******************************************************************************** * Function bsla * * input: string in integer alphabet * ********************************************************************************/ //function bsla --------------------------------------------------------- void bsla(Lstring& istring,long*& space) { long i, j, e, s, a, b, size; long procgr; long val; // allocate memory for the data structures if (space==0) { prev=new long[12*N]; space=prev; }else{ prev=space; } Gprev=prev+N; Gnext=Gprev+N; Gstart=Gnext+N; Gcntxt=Gstart+N; Cprev=Gcntxt+N; Cnext=Cprev+N; Gmemb=Cnext+N; Gval=Gmemb+N; auxA=Gval+N; auxB=auxA+N; auxC=auxB+N; // and initialize it for(i=0; iistring[i]) { while(true) { j=top_prev(); if (j==nil) { push_prev(i); //show_prev_stack(istring); prev[i]=nil; break; } if (istring[j] > istring[i]) { pop_prev(); //show_prev_stack(istring); continue; }else if (istring[j] == istring[i]) { prev_stack[prevTop]=i; //show_prev_stack(istring); prev[i]=prev[j]; break; }else{ prev[i]=j; push_prev(i); //show_prev_stack(istring); break; } } }//end case going down }//end for /* ************************************************************************ * end of setup initial prev * **************************************************************************/ /* ************************************************************************ * setup initial config * **************************************************************************/ // group by first letter last_group_number=0; for(i=0; i0) Cprev[i]=i-1; else Cprev[i]=nil; }else if (i==last_group_number) { Cnext[i]=nil; if (i>0) Cprev[i]=i-1; else Cprev[i]=nil; } } /* ************************************************************************ * end of setup initial conf * **************************************************************************/ /* ************************************************************************ * the refinement loop * **************************************************************************/ procgr=last_group_number; #ifdef _debug show_conf(istring,nil); check_conf(istring,nil); #endif long currenttargetgr; long currentchildgr; long currentval, i1, i2, chunksize, gr; while(true) { // refinement loop s=Gstart[procgr]; e=Gprev[s]; // if procgr has size 1, all tghis can be skipped if (getGsize(procgr)==1) { j=Gstart[procgr]; if (prev[j]==nil) { // no refinement taking place #ifdef _debug printf("processing of group %d complete\n",procgr); show_conf(istring,procgr); check_conf(istring,procgr); #endif procgr=Cprev[procgr]; if (procgr==nil) break; if (procgr==Cstart) break; continue; }else{ auxA[0]=j; a=1; Gval[j]=1; goto process_loop; } } // first compute valences and check if any refinement will take place ***************** val=1; a=0; for(i=e; ; i=Gprev[i]) { if (prev[i]!=nil) a=1; if (i==s) { Gval[i]=val; break; }else{ if (i==Gcntxt[procgr]+Gprev[i]) { val++; Gval[i]=nil; }else{ Gval[i]=val; val=1; } } } if (a==0) {// no refinement taking place #ifdef _debug printf("processing of group %d complete\n",procgr); show_conf(istring,procgr); check_conf(istring,procgr); #endif procgr=Cprev[procgr]; if (procgr==nil) break; if (procgr==Cstart) break; continue; } #ifdef _debug printf("valencies:\n"); for(i=s; ; i=Gnext[i]) { if (Gval[i]==nil) { if (i==e) break; continue; } printf("%d valence %d\n",i,Gval[i]); if (i==e) break; } #endif // end compute valences ************************************************ // compute buckets starts for target groups in auxC ******************* // first blank auxA for(i=s; ; i=Gnext[i]) { if (prev[i]==nil || Gval[i]==nil) { if (i==e) break; continue; } j=Gmemb[prev[i]]; auxA[j]=nil; if (i==e) break; } // now compute group translations val=0; for(i=s; ; i=Gnext[i]) { if (prev[i]==nil || Gval[i]==nil) { if (i==e) break; continue; } j=Gmemb[prev[i]]; if (auxA[j]==nil) { auxA[j]=val; auxB[val]=0; val++; } if (i==e) break; } #ifdef _debug printf("target group translations:\n"); for(i=s; ; i=Gnext[i]) { if (prev[i]==nil || Gval[i]==nil) { if (i==e) break; continue; } j=Gmemb[prev[i]]; printf("target group %d for %d translates to %d\n",j,i,auxA[j]); if (i==e) break; } #endif // compute target group frequences in auxB for(i=s; ; i=Gnext[i]) { if (prev[i]==nil || Gval[i]==nil) { if (i==e) break; continue; } j=auxA[Gmemb[prev[i]]]; auxB[j]++; if (i==e) break; } #ifdef _debug printf("target group frequences:\n"); for(i=0; i0) printf("val %d occurs %d x\n",i,auxA[i]); if (i==e) break; } #endif //compute valencies bucket starts, use val for cummulation val=0; for(i=size; i>=0; i--) { if (auxA[i]==nil || auxA[i]==0) continue; a=auxA[i]; auxA[i]=val; val += a; } #ifdef _debug printf("bucket starts for valencies:\n"); for(i=e; ; i=Gprev[i]) { if (prev[i]==nil || Gval[i]==nil) { if (i==s) break; continue; } if (auxA[i]!=nil) printf("val %d bucket starts at %d\n",i,auxA[i]); if (i==s) break; } #endif // fill in the buckets, the buckets are in auxB b=0; for(i=e; ; i=Gprev[i]) { if (prev[i]==nil || Gval[i]==nil) { if (i==s) break; continue; } j=auxA[Gval[i]]; auxB[j]=i; auxA[Gval[i]]=j+1; b++; if (i==s) break; } // auxB[0..b-1] contains relevant indices of procgr ordered by valencies in reverse #ifdef _debug printf("procgr indices ordered by valencies:\n"); for(i=0; i=0) { if (prev[j]>prev[auxB[prevTop]]) { #ifdef _debug printf("prev[%d]updated from %d to %d\n",auxB[prevTop],prev[auxB[prevTop]],prev[j]); #endif prev[auxB[prevTop--]]=prev[j]; }else{ prevTop--; } } continue; } // so we have Gprev[j] and it can be a better candidate, as long as it is not in the same chunk if (i==i2 || Gprev[j]!=prev[auxA[i+1]]) { // auxA[i+1] is not in the same chunk and so Gprev[j] if (Gprev[j]>prev[j]) { // is a very good candidate for prev[j] #ifdef _debug printf("prev[%d]updated from %d to ",j,prev[j]); #endif prev[j]=Gprev[j]; #ifdef _debug printf("%d\n",prev[j]); #endif while(prevTop>=0) { if (prev[j]>prev[auxB[prevTop]]) { #ifdef _debug printf("prev[%d]updated from %d to %d\n",auxB[prevTop],prev[auxB[prevTop]],prev[j]); #endif prev[auxB[prevTop--]]=prev[j]; }else{ prevTop--; } } }else{ // so we did not find a better candidate for prev[j], so put it in queue auxB[++prevTop]=j; // put it in queue } }else{ // so Gprev[j]==prev[auxA[i+1]] i.e. Gprev[j] is in the same chunk auxB[++prevTop]=j; // put it in queue } } // if the target gr is of the same size as the chunk, we can just update its // context and are done if (chunksize==getGsize(currenttargetgr)) { Gcntxt[currenttargetgr]=Gcntxt[currenttargetgr]+currentval*Gcntxt[procgr]; currentchildgr=currenttargetgr; #ifdef _debug for(i=i1; i1<=i2; i1++) { printf("moving %d (index=%d, procgr=%d, val=%d) from group %d to recycled group %d\n", prev[auxA[i]],auxA[i],procgr,currentval,currenttargetgr,currentchildgr); show_conf(istring); } #endif }else{ currentchildgr=++last_group_number; currentval=Gval[auxA[i1]]; j=prev[auxA[i1]]; removeFromGroup(currenttargetgr,j); startGroup(currentchildgr,j); Gcntxt[currentchildgr]=Gcntxt[currenttargetgr]+currentval*Gcntxt[procgr]; // place the new group in conf s=Cnext[currenttargetgr]; Cnext[currenttargetgr]=currentchildgr; Cprev[currentchildgr]=currenttargetgr; Cnext[currentchildgr]=s; Cprev[s]=currentchildgr; #ifdef _debug printf("moving %d (index=%d, procgr=%d, val=%d) from group %d to new group %d\n", j,auxA[i1],procgr,currentval,currenttargetgr,currentchildgr); show_conf(istring); #endif for(i=i1+1; i<=i2; i++) { j=prev[auxA[i]]; removeFromGroup(currenttargetgr,j); prependGroup(currentchildgr,j); #ifdef _debug printf("moving %d (index=%d, procgr=%d, val=%d) from group %d to existing group %d\n", j,auxA[i],procgr,currentval,currenttargetgr,currentchildgr); show_conf(istring); #endif } }// end process chunk i1=i2+1; if (i1>=a) break; currenttargetgr=Gmemb[prev[auxA[i1]]]; currentval=Gval[auxA[i1]]; }//endwhile process loop #ifdef _debug printf("processing of group %d complete\n",procgr); show_conf(istring,procgr); check_conf(istring,procgr); #endif procgr=Cprev[procgr]; if (procgr==nil) break; if (procgr==Cstart) break; }//endwhile refinenement loop /* ************************************************************************ * end of the refinement loop * **************************************************************************/ }//end bsla /******************************************************************************** ******************************************************************************** ********************* end of bsla code ***************************************** ******************************************************************************** *******************************************************************************/ // to compute LA --------------------------------------------------------------- void bsla2la(Lstring& istring,long* la,long*& space) { N=istring.getLen(); bsla(istring,space); for(long i=0; i 0 if x1[form1..to1] > x2[form2..to2] long lex(Lstring& x1,long from1,long to1,Lstring& x2,long from2,long to2) { long lcp=to1-from1+1; if (to2-from2+1 < lcp) lcp=to2-from2+1; for(long i=0; ix2[from2+i]) return 1; } // so x1[from1..from1+lcp-1]==x2[from2..from2+lcp-1] if (to1-from1==to2-from2) return 0; if (to1-from1=0) return false; } return true; }//end isLyndon #endif #ifdef _debug // returns < 0 if in the conf, gr1 precedes gr2 // returns 0 if gr1=gr2 // returns > 0 if in the conf, gr1 succeeds gr2 long grcmp(long gr1,long gr2,Lstring& istring) { long i; if (gr1>last_group_number) { printf("grcmp error: %d > %d\n",gr1,last_group_number); show_conf(istring); getchar(); exit(0); } if (gr2>last_group_number) { printf("grcmp error: %d > %d\n",gr2,last_group_number); show_conf(istring); getchar(); exit(0); } if (gr1==gr2) return 0; if (gr1==Cstart) return -1; else if (gr2==Cstart) return 1; for(i=Cstart; i!=nil; i=Cnext[i]) { if (i==gr1) return -1; else if (i==gr2) return 1; } printf("grcmp error\n"); show_conf(istring); getchar(); exit(0); }//end grcmp #endif #ifdef _debug // returns true if x[from..to] is maximal Lyndon, false otherwise bool isMaxLyndon(Lstring& x,long from,long to,long N) { if (! isLyndon(x,from,to)) return false; for(long i=to+1; iCstart) { a1=Gcntxt[Cprev[j]]; i1=Gstart[Cprev[j]]; a2=Gcntxt[j]; i2=Gstart[j]; // need to show that istring[i1..i1+a1-1] < istring[i2..i2+a2-1] if (lex(istring,i1,i1+a1-1,istring,i2,i2+a2-1)>=0) { printf("check_conf error: lex order error: group %d context <",Cprev[j]); for(long u=0; u is not smaller than group %d context <",istring[i1+a1-1],j); for(long u=0; u\n",istring[i2+a2-1]); getchar(); exit(0); } } }//end traverse conf for(i=0; i=0) { printf("check_conf error: prev error: prev[%d]=%d but %d in gr=%d and %d i gr=%d\n",i,j,i,Gmemb[i],j,Gmemb[j]); getchar(); exit(0); } } // so prev[i] is good, but could it be better? for(s=0; sj) { printf("check_conf error: prev[%d]=%d error as %d would be better prev\n",i,j,s); getchar(); exit(0); } } } } printf("check_conf: check_conf for proc. group %d successful\n",procgr); }//end check_conf #endif #ifdef _stand_alone // translate to integer alphabet char* toia(char* x) { static char buf[500]; char alpha[500]; char *y=x; long i, j; for(i=0; i<500; i++) buf[i]=alpha[i]=-1; while(*y != '\0') { buf[*y]++; y++; } for(j=i=0; i<500; i++) if (buf[i]>=0) alpha[j++]=(char) i; y=x; buf[0]='\0'; while(*y != '\0') { // translate *y strcat(buf,"."); for(i=0; alpha[i]>=0; i++) { if (alpha[i]==*y) { sprintf(buf+strlen(buf),"%u",i); break; } } y++; } return buf; } // function main ----------------------------------------------- int main(int argc,char* argv[]) { char infile[100]; char* line; long linesize; long i, d, ds; char c; if (argc != 2) { printf("usage -- %s \n",argv[0]); return 0; } strcpy(infile,argv[1]); FILE* fp; fp=fopen(infile,"r"); if (fp==0) { printf("can't open \"%s\"\n",infile); return 0; }else{ printf("input file \"%s\" successfully opened\n",infile); printf("warning -- all the strings in the file \"%s\" are assumed" " to be of the same length\n",infile); } // determine the length of the strings // read the first line to determine the length of the string N=0; while(true) { c=fgetc(fp); if (feof(fp)) break; if (c=='\n'||c=='\r') break; if (c=='.') N++; } rewind(fp); d=N; ds=0; while(d > 0) { ds++; d=d/10; } linesize = (ds+1)*N+1; line=new char[linesize]; Lstring *s=new Lstring(N); // process the file while(fgets(line,linesize,fp)) { for(i=strlen(line); i>=0; i--) { // deend the line if (line[i]=='\0' || line[i]=='\n' || line[i]=='\r') { line[i]='\0'; continue; }else{ break; } }//endfor (*s)=line; s->shownl(); long* space=0; long* la = new long[s->getLen()]; bsla2la(*s,la,space); for(long i=0; i