#include <Unigram_Train_Data_Formatter.h>
Public Member Functions | |
Unigram_Train_Data_Formatter () | |
virtual | ~Unigram_Train_Data_Formatter () |
void | format () |
Perform the actual formatting. | |
WordIndexDictionary & | get_dictionary () |
Return the dictionary being used by the formatter. | |
int | get_num_docs () |
The number of documents formatted. | |
int | get_total_num_words () |
The total number of words found. | |
Protected Member Functions | |
virtual int | insert_word_to_dict (std::string word) |
int | read_from_inp (LDA::unigram_document &wdoc, std::istream &inp) |
Protected Attributes | |
WordIndexDictionary | _dict |
int | _num_docs |
int | _num_words_in_all_docs |
boost::unordered_set< string > | _stopWords |
std::ifstream | _in |
DocumentWriter * | _doc_writer |
Unigram_Train_Data_Formatter::Unigram_Train_Data_Formatter | ( | ) |
Unigram_Train_Data_Formatter::~Unigram_Train_Data_Formatter | ( | ) | [virtual] |
void Unigram_Train_Data_Formatter::format | ( | ) | [virtual] |
Perform the actual formatting.
Implements Data_Formatter.
WordIndexDictionary & Unigram_Train_Data_Formatter::get_dictionary | ( | ) | [virtual] |
Return the dictionary being used by the formatter.
Implements Data_Formatter.
int Unigram_Train_Data_Formatter::get_num_docs | ( | ) | [virtual] |
The number of documents formatted.
Implements Data_Formatter.
int Unigram_Train_Data_Formatter::get_total_num_words | ( | ) | [virtual] |
The total number of words found.
Implements Data_Formatter.
virtual int Unigram_Train_Data_Formatter::insert_word_to_dict | ( | std::string | word | ) | [protected, virtual] |
Reimplemented in Unigram_Test_Data_Formatter, and Unigram_Model_Streamer.
int Unigram_Train_Data_Formatter::read_from_inp | ( | LDA::unigram_document & | wdoc, | |
std::istream & | inp | |||
) | [protected] |
DocumentWriter* Unigram_Train_Data_Formatter::_doc_writer [protected] |
std::ifstream Unigram_Train_Data_Formatter::_in [protected] |
int Unigram_Train_Data_Formatter::_num_docs [protected] |
int Unigram_Train_Data_Formatter::_num_words_in_all_docs [protected] |
boost::unordered_set<string> Unigram_Train_Data_Formatter::_stopWords [protected] |