Create an interpreter for a Markov Algorithm. Rules have the syntax:
<ruleset> ::= ((<comment> | <rule>) <newline>+)* <comment> ::= # {<any character>} <rule> ::= <pattern> <whitespace> -> <whitespace> [.] <replacement> <whitespace> ::= (<tab> | <space>) [<whitespace>]
There is one rule per line. If there is a . present before the <replacement>, then this is a terminating rule in which case the interpreter must halt execution. A ruleset consists of a sequence of rules, with optional comments.
Note: Non-use of iswhite
is intentional, since depending on the locale, other chars besides space and tab might be detected by that function.
#include <cstdlib> #include <iostream> #include <fstream> #include <vector> #include <string> struct rule { std::string pattern; std::string replacement; bool terminal; rule(std::string pat, std::string rep, bool term): pattern(pat), replacement(rep), terminal(term) { } }; std::string const whitespace = " \t"; std::string::size_type const npos = std::string::npos; bool is_whitespace(char c) { return whitespace.find(c) != npos; } std::vector<rule> read_rules(std::ifstream& rulefile) { std::vector<rule> rules; std::string line; while (std::getline(rulefile, line)) { std::string::size_type pos; // remove comments pos = line.find('#'); if (pos != npos) line.resize(pos); // ignore lines consisting only of whitespace if (line.find_first_not_of(whitespace) == npos) continue; // find "->" surrounded by whitespace pos = line.find("->"); while (pos != npos && (pos == 0 || !is_whitespace(line[pos-1]))) pos = line.find("->", pos+1); if (pos == npos || line.length() < pos+3 || !is_whitespace(line[pos+2])) { std::cerr << "invalid rule: " << line << "\n"; std::exit(EXIT_FAILURE); } std::string pattern = line.substr(0, pos-1); std::string replacement = line.substr(pos+3); // remove additional separating whitespace pattern.erase(pattern.find_last_not_of(whitespace)+1); replacement.erase(0, replacement.find_first_not_of(whitespace)); // test for terminal rule bool terminal = !replacement.empty() && replacement[0] == '.'; if (terminal) replacement.erase(0,1); rules.push_back(rule(pattern, replacement, terminal)); } return rules; } std::string markov(std::vector<rule> rules, std::string input) { std::string& output = input; std::vector<rule>::iterator iter = rules.begin(); // Loop through each rule, transforming our current version // with each rule. while (iter != rules.end()) { std::string::size_type pos = output.find(iter->pattern); if (pos != npos) { output.replace(pos, iter->pattern.length(), iter->replacement); if (iter->terminal) break; iter = rules.begin(); } else ++iter; } return output; } int main(int argc, char* argv[]) { if (argc != 3) { std::cout << "usage:\n " << argv[0] << " rulefile text\n"; return EXIT_FAILURE; } std::ifstream rulefile(argv[1]); std::vector<rule> rules = read_rules(rulefile); std::string input(argv[2]); std::string output = markov(rules, input); std::cout << output << "\n"; }
Content is available under GNU Free Documentation License 1.2.