#include "TaggedTreebank.h" #include #include /* Set up a list of acceptable tags */ TaggedTreebank::TaggedTreebank(void) { tags.push_back("CC"); // Coordinating conjunction tags.push_back("CD"); // Cardinal number tags.push_back("DT"); // Determiner tags.push_back("EX"); // Existential there tags.push_back("FW"); // Foreign WordItem tags.push_back("IN"); // Preposition or subordinating conjunction tags.push_back("JJ"); // Adjective tags.push_back("JJR"); // Adjective, comparative tags.push_back("JJS"); // Adjective, superlative tags.push_back("LS"); // List item marker tags.push_back("MD"); // Modal tags.push_back("NN"); // Noun, singular or mass tags.push_back("NNS"); // Noun, plural tags.push_back("NNP"); // Proper noun, singular tags.push_back("NNPS"); // Proper noun, plural tags.push_back("PDT"); // Predeterminer tags.push_back("POS"); // Possessive ending tags.push_back("PP"); // Personal pronoun tags.push_back("PPS"); // Possessive pronoun tags.push_back("RB"); // Adverb tags.push_back("RBR"); // Adverb, comparative tags.push_back("RBS"); // Adverb, superlative tags.push_back("RP"); // Particle tags.push_back("SYM"); // Symbol tags.push_back("TO"); // to tags.push_back("UH"); // Interjection tags.push_back("VB"); // Verb, base form tags.push_back("VBD"); // Verb, past tense tags.push_back("VBG"); // Verb, gerund or present participle tags.push_back("VBN"); // Verb, past participle tags.push_back("VBP"); // Verb, non-3rd person singular present tags.push_back("VBZ"); // Verb, 3rd person singular present tags.push_back("WDT"); // Wh-determiner tags.push_back("WP"); // Wh-pronoun tags.push_back("WPS"); // Possessive wh-pronoun tags.push_back("WRB"); // Wh-adverb } TaggedTreebank::~TaggedTreebank(void) { } /* Pull the PoS from the treebank and assemble the list of words ** Tags will always be right after a (, and right before a WordItem */ vector TaggedTreebank::Tag(string filename) { cout << endl << "Pulling PoS from a Tagged Treebank ... "; vector t; string line, word, tag; bool lookForTag = true; fstream Tagged; string outFile = filename; Tagged.open(outFile.c_str()); while (!Tagged.eof()) { getline(Tagged,line); for (int i = 0; i < line.size(); i++) { if (line[i] == '(') {// a tag is coming soon lookForTag = true; tag = ""; } else if (line[i] == ')') { // A tag and a WordItem have been read in. save them. if (word != "") { if (tag.find("#") != -1) tag.resize(tag.find("#")); if (isValidTag(tag)) { t.push_back(WordItem(word,tag)); } tag = ""; word = ""; } } else if ((lookForTag) && (line[i] != ' ') && (line[i] != '\t')) // the tag has been read in and is done tag = tag + line[i]; else if ((!lookForTag) && (line[i] != ' ') && (line[i] != '\t')) // the WordItem has been read in and is done word = word + line[i]; else if ((lookForTag) && ((line[i] == ' ') || (line[i] == '\t'))) // stop looking for a tag lookForTag = false; } } Tagged.close(); return t; } /* Make sure that the tag is in the tagset */ bool TaggedTreebank::isValidTag(string tag) { bool result = false; for (int i = 0; i < tags.size(); i++) { if (tag == tags[i]) { result = true; break; } } return result; }