Implementation of CYK algorithm in C++
The CYK algorithm is a parsing algorithm for context-free grammars. It employs bottom-up parsing and dynamic programming. It is used to check whether a particular string can be generated from a set of productions or not.
The CYK algorithm is a parsing algorithm for context-free grammars. It employs bottom-up parsing and dynamic programming. It is used to check whether a particular string can be generated from a set of productions or not.
#include<iostream> #include<cstring> #include<algorithm> #include<string> #include<cassert> #include<iomanip> using namespace std; #define MAX 100 #define for(i,a,b) for(i=a;i<b; i++) string gram[MAX][MAX]; //to store entered grammar string dpr[MAX]; int p,np; //np-> number of productions inline string concat( string a, string b) //concatenates unique non-terminals { int i; string r=a; for(i,0,b.length()) if(r.find(b[i]) > r.length()) r+=b[i]; return (r); } inline void break_gram(string a) //seperates right hand side of entered grammar { int i; p=0; while(a.length()) { i=a.find("|"); if(i>a.length()) { dpr[p++] = a; a=""; } else { dpr[p++] = a.substr(0,i); a=a.substr(i+1,a.length()); } } } inline int lchomsky(string a) //checks if LHS of entered grammar is in CNF { if(a.length()==1 && a[0]>='A' && a[0]<='Z') return 1; return 0; } inline int rchomsky(string a) //checks if RHS of grammar is in CNF { if (a.length() == 1 && a[0]>='a' && a[0] <='z') return 1; if (a.length()==2 && a[0]>='A' && a[0]<='Z' && a[1]>='A' && a[1]<='Z' ) return 1; return 0; } inline string search_prod(string p) //returns a concatenated string of variables which can produce string p { int j,k; string r=""; for(j,0,np) { k=1; while(gram[j][k] != "") { if(gram[j][k] == p) { r=concat(r,gram[j][0]); } k++; } } return r; } inline string gen_comb(string a, string b) //creates every combination of variables from a and b . For eg: BA * AB = {BA, BB, AA, BB} { int i,j; string pri=a,re=""; for(i,0,a.length()) for(j,0,b.length()) { pri=""; pri=pri+a[i]+b[j]; re=re+search_prod(pri); //searches if the generated productions can be created or not } return re; } int main() { int i,pt,j,l,k; string a,str,r,pr,start; cout<<"\nEnter the start Variable "; cin >> start; cout<<"\nNumber of productions "; cin >> np; for(i,0,np) { cin >> a; pt=a.find("->"); gram[i][0] = a.substr(0,pt); if (lchomsky(gram[i][0]) == 0) { cout<<"\nGrammar not in Chomsky Form"; abort(); } a = a.substr(pt+2, a.length()); break_gram(a); for(j,0,p) { gram[i][j+1]=dpr[j]; if (rchomsky(dpr[j]) == 0) { cout<<"\nGrammar not in Chomsky Form"; abort(); } } } string matrix[MAX][MAX],st; cout<<"\nEnter string to be checked : "; cin >> str; for(i,0,str.length()) //Assigns values to principal diagonal of matrix { r=""; st = ""; st+=str[i]; for(j,0,np) { k=1; while(gram[j][k] != "") { if(gram[j][k] == st) { r=concat(r,gram[j][0]); } k++; } } matrix[i][i]=r; } int ii,kk; for(k,1,str.length()) //Assigns values to upper half of the matrix { for(j,k,str.length()) { r=""; for(l,j-k,j) { pr = gen_comb(matrix[j-k][l],matrix[l+1][j]); r = concat(r,pr); } matrix[j-k][j] = r; } } for(i,0,str.length()) //Prints the matrix { k=0; l=str.length()-i-1; for(j,l,str.length()) { cout<<setw(5)<<matrix[k++][j]<<" "; } cout<<endl; } int f=0; for(i,0,start.length()) if(matrix[0][str.length()-1].find(start[i]) <= matrix[0][str.length()-1].length()) //Checks if last element of first row contains a Start variable { cout<<"String can be generated\n"; return 0; } cout<<"String cannot be generated\n"; return 0; }Sample Input ::
Enter the start Variable S Number of productions 4 S->AB|BC A->BA|a B->CC|b C->AB|a Enter string to be checked :baabaOutput ::
nice code. Thanx :)
ReplyDeletethank you! it is very useful!
ReplyDeleteThank's alot, that was realy helpful.
ReplyDeleteThanks man...
ReplyDeleteany idea where to find Cyk parser in java?
ReplyDeleteExcellent! Thank you.
ReplyDeleteNice code. But do you have the code of conversing context free grammar to Chomsky normal form?
ReplyDeleteyour algorithm is almost correct. but the program crashes if the input is not in Chomsky Normal Form.
ReplyDeleteTry with this input
Enter the start Variable S
Number of productions 5
S->AB|CD
A->BC|a
B->AC|C
C->AB|CD
D->AC|d
CYK is only used when grammer is in CNF otherwise not
DeleteElegant...
ReplyDeleteconversion to cnf is missing if anyone could add that...
Hi may i know where can i get this code IN HTML?
ReplyDeleteNice Code..Many Many thanks..
ReplyDeleteMain function er comment ro barale bhalo hoto.
ReplyDeletethanks!! its realy Helpfull
ReplyDeleteI can understand the bottom up processing, but what is Chomsky Normal Form? I had a student who is not a linguistics major get this for an assignment, which is think is sucky for an IT major. I studied linguistics, and I would like to more know about this theory.
ReplyDeleteRespect and that i have a neat provide: Who Repairs House Windows Near Me house renovation budget
ReplyDelete