#include "MaltParserGenerator.h" #include #include MaltParserGenerator::MaltParserGenerator(void) { } MaltParserGenerator::~MaltParserGenerator(void) { } /* Generate dependencies on a set of tagged sentences. */ DependencyStructure MaltParserGenerator::Generate(vector article) { cout << endl << "Running MaltParser ... "; // Save the list of tagged words in MaltTab DependencyStructure s; SaveAsMaltTab(article); // Run MaltParser on the file CallMaltParser(); cout << "... "; // Read the dependency structure back from the MaltTab file vector location; location.push_back(0); int i = 0; string word, tag, label; int parent; fstream Tagged; Tagged.open("DependencyTagged.txt"); ofstream OriginalArticle; OriginalArticle.open("Original.txt", ios::out); // the malt tab file references parents by their original location // but we know that some of the nouns refer to the same thing // so let's create a new listing of what words go to what locations. while (!Tagged.eof()) { // for each WordItem i++; Tagged >> word; OriginalArticle << word << " "; Tagged >> tag; Tagged >> parent; Tagged >> label; if ((tag == "NP") || (tag == "NNP") || (tag == "NN")|| (tag == "NNS")) { // if the WordItem is a noun, try to find it first location.push_back(s.Find(word)); if (location[i] == -1) // if it's not found, just add it location[i] = s.AddWord(WordItem(word,tag)); } else { // if it's not a noun, just add it location.push_back(s.AddWord(WordItem(word,tag))); } } OriginalArticle.close(); Tagged.close(); fstream Dependencies; // Once we have that listing of what words go to what locations, // we can record the dependencies in the Edges array Dependencies.open("DependencyTagged.txt"); i = 0; while (!Dependencies.eof()) { i++; Dependencies >> word; Dependencies >> tag; Dependencies >> parent; Dependencies >> label; s.Edges.push_back(Dependency(location[parent],location[i],label)); } Dependencies.close(); return s; } /* This saves the list of tagged words in MaltTab format ** MaltTab has one WordItem per line, with the WordItem and its tag separated by a tab */ void MaltParserGenerator::SaveAsMaltTab(vector article) { ofstream MaltTabArticle; MaltTabArticle.open("MaltTab.txt", ios::out); for (int i = 0; i < article.size(); i++) { if (article[i].word != "") MaltTabArticle << article[i].word << '\t' << article[i].tag << endl; } MaltTabArticle.close(); } /* Issues a systme call to the MaltParser program, which takes in the ** MaltTab data file and generates dependencies from it */ void MaltParserGenerator::CallMaltParser() { string command = "cd MaltParser && maltparser.exe -f eng/option.dat"; system(command.c_str()); }