正则表达式(非确定性有限状态机NFA) -- C++实现


实现代码

// NFA.h

#ifndef NFA_H
#define NFA_H

#include "Component.h"
#include "DirectionGraphDFS.h"

namespace jay{

	// 该算法的具体原理查看 算法(java实现)第四版 第五章第四小节
	class NFA
	{
	private:
		DirectionGraphDFS<int> m_graph;     // digraph of epsilon transitions
		string m_regexp;     // regular expression
		int m_reg_legth;       // number of characters in regular expression

	public:
		NFA(string regexp)
		{
			m_regexp = regexp;
			m_reg_legth = regexp.length();
			stack<int> ops; 

			for (int i = 0; i < m_reg_legth; i++) 
			{ 
				int lp = i; 

				if (regexp.at(i) == '(' || regexp.at(i) == '|') 
					ops.push(i); 
				else if (regexp.at(i) == ')') 
				{
					int or = ops.top();
					ops.pop(); 

					// 2-way or operator
					if (regexp.at(or) == '|') 
					{ 
						lp = ops.top();
						ops.pop();
						m_graph.AddEdge(lp, or+1);
						m_graph.AddEdge(or, i);
					}
					else if (regexp.at(or) == '(')
						lp = or;
					else 
						throw "括号不匹配";
				} 

				// closure operator (uses 1-character lookahead)
				if (i < m_reg_legth-1 && regexp.at(i+1) == '*') { 
					m_graph.AddEdge(lp, i+1); 
					m_graph.AddEdge(i+1, lp); 
				} 
				if (regexp.at(i) == '(' || regexp.at(i) == '*' || regexp.at(i) == ')') 
					m_graph.AddEdge(i, i+1);

			}

			if (ops.size() != 0)
				throw "括号不匹配";
		}

		bool Recognizes(string txt) 
		{
			m_graph.DFS(0);
			list<int> pc;
			for (int v = 0; v < m_graph.GetVexterCount(); v++)
				if (m_graph.Marked(v)) 
					pc.push_front(v);

			// Compute possible NFA states for txt[i+1]
			for (int i = 0; i < txt.length(); i++) 
			{
				if (txt.at(i) == '*' || txt.at(i) == '|' || txt.at(i) == '(' || txt.at(i) == ')')
					throw "text contains the metacharacter ";

				list<int> match;
				list<int>::iterator pcit = pc.begin();
				while (pcit != pc.end()) 
				{
					int v = *pcit;
					if (v == m_reg_legth) 
						continue;
					if ((m_regexp.at(v) == txt.at(i)) || m_regexp.at(v) == '.')
						match.push_front(v+1); 

					pcit++;
				}

				pc.clear();
				DirectionGraphDFS<int> dfs(m_graph); 
				dfs.DFS(match);
				for (int v = 0; v < dfs.GetVexterCount(); v++)
					if (dfs.Marked(v)) 
						pc.push_front(v);

				// optimization if no states reachable
				if (pc.size() == 0) 
					return false;
			}

			// check for accept state
			list<int>::iterator it = pc.begin();
			for (;it != pc.end();it++)
			{
				if (*it == m_reg_legth -1 ) 
					return true;
			}

			return false;
		}

	};
    
};

#endif 

测试代码

//main.cpp
#include "Component.h"
#include "NFA.h"
using namespace jay;


int main(int argc, char* argv[])
{
	NFA nfa("((A*B|AC)D)");
	cout << "Recognizes AAAAAAABD ? " << nfa.Recognizes("AAAAAAABD") << endl;
	cout << "Recognizes AAAABD ? " << nfa.Recognizes("AAAABD") << endl;
	cout << "Recognizes ACD ? " << nfa.Recognizes("ACD") << endl;
	cout << "Recognizes AC ? " << nfa.Recognizes("AC") << endl;
	cout << "Recognizes AAAAAAAAAAAAAAAAABD ? " << nfa.Recognizes("AAAAAAAAAAAAAAAAABD") << endl;
	cout << "Recognizes ABCD ? " << nfa.Recognizes("ABCD") << endl;
	cout << "Recognizes BD ? " << nfa.Recognizes("BD") << endl;

	cout << "--------------------------------------------" <<endl;
	NFA nfa2("((AB)*|C)");
	cout << "Recognizes AB ? " << nfa2.Recognizes("AB") << endl;
	cout << "Recognizes ABABAB ? " << nfa2.Recognizes("ABABAB") << endl;
	cout << "Recognizes ABABA ? " << nfa2.Recognizes("ABABA") << endl;
	cout << "Recognizes C ? " << nfa2.Recognizes("C") << endl;
}

运行结果

暂无评论

注册用户登录后才能发表或者回复评论,请先登录 注册。