Thursday 16 January 2014

CYK algorithm implementation

Implementation of CYK algorithm in C++

The CYK algorithm is a parsing algorithm for context-free grammars. It employs bottom-up parsing and dynamic programming. It is used to check whether a particular string can be generated from a set of productions or not.

#include<iostream>  
#include<cstring>
#include<algorithm>
#include<string>
#include<cassert>
#include<iomanip>
using namespace std;

#define MAX 100
#define for(i,a,b) for(i=a;i<b; i++)

string gram[MAX][MAX];	//to store entered grammar
string dpr[MAX];
int p,np;		//np-> number of productions

inline string concat( string a, string b)	//concatenates unique non-terminals
{
	int i;
	string r=a;
	for(i,0,b.length())
		if(r.find(b[i]) > r.length())
			r+=b[i];
	return (r);
}

inline void break_gram(string a)	//seperates right hand side of entered grammar
{
	int i;
	p=0;
	while(a.length())
	{
		i=a.find("|");
		if(i>a.length())
		{
			dpr[p++] = a;
			a="";
		}
		else
		{
			dpr[p++] = a.substr(0,i);
			a=a.substr(i+1,a.length());
		}
	}
}

inline int lchomsky(string a)	//checks if LHS of entered grammar is in CNF
{
	if(a.length()==1 && a[0]>='A' && a[0]<='Z')
		return 1;
	return 0;
}

inline int rchomsky(string a)	//checks if RHS of grammar is in CNF
{
	if (a.length() == 1 && a[0]>='a' && a[0] <='z')
		return 1;
	if (a.length()==2 && a[0]>='A' && a[0]<='Z' && a[1]>='A' && a[1]<='Z' )
		return 1;
	return 0;
}

inline string search_prod(string p)	//returns a concatenated string of variables which can produce string p
{
	int j,k;
	string r="";
	for(j,0,np)
	{
		k=1;
		while(gram[j][k] != "")
		{
			if(gram[j][k] == p)
			{
				r=concat(r,gram[j][0]);
			}
			k++;
		}
	}	
	return r;
}

inline string gen_comb(string a, string b)	//creates every combination of variables from a and b . For eg: BA * AB = {BA, BB, AA, BB}
{
	int i,j;
	string pri=a,re="";
	for(i,0,a.length())
		for(j,0,b.length())
		{
			pri="";
			pri=pri+a[i]+b[j];
			re=re+search_prod(pri);		//searches if the generated productions can be created or not
		}		
	return re;
}

int main()
{
	int i,pt,j,l,k;
	string a,str,r,pr,start;
	cout<<"\nEnter the start Variable ";
	cin >> start;
	cout<<"\nNumber of productions ";
	cin >> np;
	for(i,0,np)
	{
		cin >> a;
		pt=a.find("->");
		gram[i][0] = a.substr(0,pt);
		if (lchomsky(gram[i][0]) == 0)
		{
			cout<<"\nGrammar not in Chomsky Form";
			abort();
		}
		a = a.substr(pt+2, a.length());
		break_gram(a);
		for(j,0,p)
		{
			gram[i][j+1]=dpr[j];
			if (rchomsky(dpr[j]) == 0)
			{
				cout<<"\nGrammar not in Chomsky Form";
				abort();
			}
		}
	}
	string matrix[MAX][MAX],st;
	cout<<"\nEnter string to be checked : ";
	cin >> str;
	for(i,0,str.length())		//Assigns values to principal diagonal of matrix
	{
		r="";
		st = "";
		st+=str[i];
		for(j,0,np)
		{
			k=1;
			while(gram[j][k] != "")
			{
				if(gram[j][k] == st)
				{
					r=concat(r,gram[j][0]);
				}
				k++;
			}
		}
		matrix[i][i]=r;
	}
	int ii,kk;
	for(k,1,str.length())		//Assigns values to upper half of the matrix
	{
		for(j,k,str.length())
		{
			r="";
			for(l,j-k,j)
			{
				pr = gen_comb(matrix[j-k][l],matrix[l+1][j]);
				r = concat(r,pr);
			}
			matrix[j-k][j] = r;
		}
	}

	for(i,0,str.length())	//Prints the matrix
	{
		k=0;
		l=str.length()-i-1;
		for(j,l,str.length())
		{
			cout<<setw(5)<<matrix[k++][j]<<" ";
		}
		cout<<endl;
	}
			
	int f=0;
	for(i,0,start.length())
		if(matrix[0][str.length()-1].find(start[i]) <= matrix[0][str.length()-1].length())	//Checks if last element of first row contains a Start variable
		{
			cout<<"String can be generated\n";
			return 0;
		}
	cout<<"String cannot be generated\n";
	return 0;
}
Sample Input ::
Enter the start Variable S

Number of productions 4

S->AB|BC
A->BA|a
B->CC|b
C->AB|a

Enter string to be checked :baaba
Output ::

15 comments:

  1. nice code. Thanx :)

    ReplyDelete
  2. thank you! it is very useful!

    ReplyDelete
  3. Thank's alot, that was realy helpful.

    ReplyDelete
  4. any idea where to find Cyk parser in java?

    ReplyDelete
  5. Nice code. But do you have the code of conversing context free grammar to Chomsky normal form?

    ReplyDelete
  6. your algorithm is almost correct. but the program crashes if the input is not in Chomsky Normal Form.
    Try with this input

    Enter the start Variable S

    Number of productions 5
    S->AB|CD
    A->BC|a
    B->AC|C
    C->AB|CD
    D->AC|d

    ReplyDelete
    Replies
    1. CYK is only used when grammer is in CNF otherwise not

      Delete
  7. Elegant...
    conversion to cnf is missing if anyone could add that...

    ReplyDelete
  8. Hi may i know where can i get this code IN HTML?

    ReplyDelete
  9. Nice Code..Many Many thanks..

    ReplyDelete
  10. Main function er comment ro barale bhalo hoto.

    ReplyDelete
  11. I can understand the bottom up processing, but what is Chomsky Normal Form? I had a student who is not a linguistics major get this for an assignment, which is think is sucky for an IT major. I studied linguistics, and I would like to more know about this theory.

    ReplyDelete