#include "FreeLingTagger.h" #include #include FreeLingTagger::FreeLingTagger(void) { } FreeLingTagger::~FreeLingTagger(void) { } /* Tag takes in a filename of a text file containing sentences ** and returns a list of the tagged words, using FreeLing's Tagger */ vector FreeLingTagger::Tag(string filename) { // Setup cout << endl << "Running FreeLing's Tagger ... "; vector t; string word, morph, tag; fstream Tagged; // Run FreeLing; it saves output as a file string outFile = CallFreeLing(filename); // Open the file and read the words into an array. Tagged.open(outFile.c_str()); while (!Tagged.eof()) { t.push_back(WordItem(word,ConvertTag(word,tag))); Tagged >> word; Tagged >> morph; Tagged >> tag; } Tagged.close(); return t; } /* This function issues a command line call to the FreeLing utility ** it returns the file that FreeLing saves it's tagged information in */ string FreeLingTagger::CallFreeLing(string filename) { string inFile = filename; string outFile = "Tagged.txt"; string command = "cd FreeLing && analyzer.exe -f analyzer.cfg > ../" + outFile + " < ../" + inFile; system(command.c_str()); return outFile; } /* ConvertTag takes in a WordItem and a tag from FreeLing's tagger ** it converts that tag into the subset of the Penn Treebank Tagset ** that MaltParser uses */ string FreeLingTagger::ConvertTag(string WordItem, string inTag) { if (inTag.compare("Z") == 0) // Z is a Cardinal Number return "CD"; else if (inTag.compare("PP$") == 0) // Posessive Pronoun return "PRP$"; else if (inTag.compare("NP") == 0) // Proper Noun, Singular return "NNP"; else if ((WordItem.compare("to") == 0) && (inTag.compare("IN") == 0)) // To return "TO"; else if (inTag[0] == 'F') { // Punctuation if ((WordItem[0] == '(') || (WordItem[0] == '{') || (WordItem[0] == '[')) return "-LRB-"; else if ((WordItem[0] == ')') || (WordItem[0] == '}') || (WordItem[0] == ']')) return "-RRB-"; else if (WordItem[0] == '`') return "``"; else if (WordItem[0] == '\'') return "\""; else if (WordItem[0] == '-') return ","; else if ((WordItem[0] == '?') || (WordItem[0] == '!')) return "."; else if ((WordItem[0] == ';') || (WordItem.compare("...") == 0)) return ":"; else return WordItem; } else return inTag; }