#include "Generator.h" #include #include Generator::Generator(void) { adverbLabels.push_back("EB"); adverbLabels.push_back("EC"); adverbLabels.push_back("EE"); preVerbLabels.push_back("E"); verbLabels.push_back("S"); adjectiveLabels.push_back("A"); adjectiveLabels.push_back("AN"); //adjectiveLabels.push_back("M"); I'm not sure how this works. seriesLabels.push_back("G"); seriesLabels.push_back("ID"); seriesLabels.push_back("TM"); } Generator::~Generator(void) { } /* Generate a summary by following paths along the dependency structure */ string Generator::Summarize(DependencyStructure &Article) { a = Article; cout << endl << "Generating Summary ... "; int minimum, maximum, ranking; int here, words, next = 0; vector summary; saveAsJavaScript(); fstream rankings; rankings.open("rankings.csv",fstream::out|fstream::trunc); rankings << "\"Index\",\"Word\",\"Ranking\",\"Tag\"" << endl; for (int q = 0; q < 1; q++) { words = a.Verticies.size(); next = here = words = 0; minimum = maximum = ranking = a.Verticies[0].ranking; // Find the range of rankings and the highest ranking noun for (int w = 1; w < a.Verticies.size(); w++) { if (VERBOSITY > 2) cout << a.Verticies[w].ranking << '\t' << a.Verticies[w].tag << '\t' << a.Verticies[w].word << endl; rankings << w << ",\"" << a.Verticies[w].word << "\"," << a.Verticies[w].ranking << ",\"" << a.Verticies[w].tag << "\"" << endl; if (a.Verticies[w].ranking > maximum) { maximum = a.Verticies[w].ranking; if (a.Verticies[w].tag[0] == 'n') { next = w; ranking = maximum; } } if (a.Verticies[w].ranking < minimum) { minimum = a.Verticies[w].ranking; } } // Add all articles/determiners // add other relavant adjectives // add strongly tied words // add the noun // add a verb // follow the same process as the subject for the object int NP, VP; NP = next; VP = getHighestRatedRelatedBy(NP,verbLabels); if ((NP != -1) && (VP != -1)) { addSubject(NP,summary); addVerb(VP,summary); next = getHighestRatedRelated(VP,"n"); if ((next >= 0) && (a.Verticies[next].ranking > 0)) addSubject(next,summary); summary.push_back("."); } else { a.Verticies[NP].ranking = 0; q--; } } /* for q */ string s; vector::iterator iter; for( iter = summary.begin(); iter != summary.end(); iter++ ) { s = s + " " + *iter; } return s; } void Generator::addWord(int index, vector &summary) { WordItem word = a.Verticies[index]; if (word.ranking == 0) // no word should have a zero ranking return; // unless it's already gone through this. So don't reprint anything. string w = word.word; char buffer[7]; w.append(" (#"); itoa(index,buffer,10); w.append(buffer); w.append(" !"); itoa(word.ranking,buffer,10); w.append(buffer); w.append(")\n"); summary.push_back(w); a.Verticies[index].ranking = 0; } void Generator::addSubject(int index, vector&summary) { // Add all articles/determiners for (int i = 0; i < a.Edges.size(); i++) { if ((a.Edges[i].label[0] == 'D') && (a.Edges[i].child == index)) { addWord(a.Edges[i].parent,summary); break; // only add one determiner. } } // add other relavant adjectives addWordsRelatedTo(index,adjectiveLabels,0.4*a.Verticies[index].ranking,summary); // add strongly tied words addWordsRelatedTo(index,seriesLabels,0,summary); // add the noun addWord(index,summary); } void Generator::addVerb(int index, vector&summary) { // add adverbs addWordsRelatedTo(index,adverbLabels,a.Verticies[index].ranking*0.5,summary); // add other words that define the meaning of the verb addWordsRelatedTo(index,preVerbLabels,0,summary); addWord(index,summary); } int Generator::getHighestRatedRelated(int index, string tag) { int ranking = 0; int highest = -1; for (int i = 0; i < a.Edges.size(); i++) { if ((a.Edges[i].child == index) && (a.Verticies[a.Edges[i].parent].tag == tag) && (a.Verticies[a.Edges[i].parent].ranking > ranking)) { ranking = a.Verticies[a.Edges[i].parent].ranking; highest = a.Edges[i].parent; } else if ((a.Edges[i].parent == index) && (a.Verticies[a.Edges[i].child].tag == tag) && (a.Verticies[a.Edges[i].child].ranking > ranking)) { ranking = a.Verticies[a.Edges[i].child].ranking; highest = a.Edges[i].child; } } return highest; } int Generator::getHighestRatedRelatedBy(int index, vector labels) { int ranking = 0; int highest = -1; for (int i = 0; i < a.Edges.size(); i++) { if ((a.Edges[i].child == index) && (stringIn(a.Edges[i].label,labels)) && (a.Verticies[a.Edges[i].parent].ranking > ranking)) { ranking = a.Verticies[a.Edges[i].parent].ranking; highest = a.Edges[i].parent; } else if ((a.Edges[i].parent == index) && (stringIn(a.Edges[i].label,labels)) && (a.Verticies[a.Edges[i].child].ranking > ranking)) { ranking = a.Verticies[a.Edges[i].child].ranking; highest = a.Edges[i].child; } } return highest; } void Generator::addWordsRelatedTo(int index, vector labels, int limit, vector &summary) { for (int i = 0; i < a.Edges.size(); i++) { if ((stringIn(a.Edges[i].label,labels)) && ((a.Edges[i].child == index) || (a.Edges[i].parent == index))) { if (a.Edges[i].child == index) { if (a.Verticies[a.Edges[i].parent].ranking >= limit) addWord(a.Edges[i].parent,summary); } else { if (a.Verticies[a.Edges[i].child].ranking >= limit) addWord(a.Edges[i].child,summary); } } } } bool Generator::stringIn(string s, vector a) { for (int i = 0; i < a.size(); i++) { if (a[i] == s) return true; } return false; } void Generator::saveAsJavaScript() { ofstream js; js.open("article.js"); if (!js.is_open()) { cout << "Unable to open js file" << endl; char s; cin >> s; } js << "var V = new Array();" << endl; for (int i = 0; i < a.Verticies.size(); i++) { js << "V[" << i << "] = {"; js << "word: \"" << a.Verticies[i].word << "\","; js << "tag: \"" << a.Verticies[i].tag << "\","; js << "ranking: " << a.Verticies[i].ranking << "};" << endl; } js << endl; js << "var E = new Array();" << endl; for (int i = 0; i < a.Edges.size(); i++) { js << "E[" << i << "] = {"; js << "child: " << a.Edges[i].child << ","; js << "parent: " << a.Edges[i].parent << ","; js << "label: \"" << a.Edges[i].label << "\"};" << endl; } js.close(); }