#include <Unigram_Model_Trainer.h>
Public Member Functions | |
Unigram_Model_Trainer (TypeTopicCounts &, Parameter &, Parameter &) | |
virtual | ~Unigram_Model_Trainer () |
google::protobuf::Message * | allocate_document_buffer (size_t) |
void | deallocate_document_buffer (google::protobuf::Message *) |
google::protobuf::Message * | get_nth_document (google::protobuf::Message *docs, size_t n) |
void * | read (google::protobuf::Message &) |
void * | sample (void *) |
void * | update (void *) |
void * | optimize (void *) |
void * | eval (void *, double &) |
Compute the document portion of the log-likelihood. | |
void | write (void *) |
void | iteration_done () |
void * | test (void *) |
Static Public Attributes | |
static long | doc_index = -1 |
The default implementation of Model_Refiner for the Unigram model
Unigram_Model_Trainer::Unigram_Model_Trainer | ( | TypeTopicCounts & | ttc, | |
Parameter & | alpha, | |||
Parameter & | beta | |||
) |
Unigram_Model_Trainer::~Unigram_Model_Trainer | ( | ) | [virtual] |
google::protobuf::Message * Unigram_Model_Trainer::allocate_document_buffer | ( | size_t | num_docs | ) | [virtual] |
Implements Model_Refiner.
void Unigram_Model_Trainer::deallocate_document_buffer | ( | google::protobuf::Message * | docs | ) | [virtual] |
Implements Model_Refiner.
void * Unigram_Model_Trainer::eval | ( | void * | token, | |
double & | eval_value | |||
) | [virtual] |
Compute the document portion of the log-likelihood.
Implements Model_Refiner.
google::protobuf::Message * Unigram_Model_Trainer::get_nth_document | ( | google::protobuf::Message * | docs, | |
size_t | n | |||
) | [virtual] |
Implements Model_Refiner.
void Unigram_Model_Trainer::iteration_done | ( | ) | [virtual] |
Implements Model_Refiner.
void * Unigram_Model_Trainer::optimize | ( | void * | token | ) | [virtual] |
Performs stochastic GD to optimize the alphas. The gradients are accumulated for tau docs and then the global alphas are updated.
Implements Model_Refiner.
void * Unigram_Model_Trainer::read | ( | google::protobuf::Message & | doc | ) | [virtual] |
Reads a document from the protobuf format word & topic files using DocumentReader
Implements Model_Refiner.
void * Unigram_Model_Trainer::sample | ( | void * | token | ) | [virtual] |
Does Gibbs sampling using sampler.cpp to figure out new topic assignments to each word present in the document passed in the msg
Implements Model_Refiner.
void * Unigram_Model_Trainer::test | ( | void * | token | ) | [virtual] |
Implements Model_Refiner.
void * Unigram_Model_Trainer::update | ( | void * | token | ) | [virtual] |
Takes a msg which contains the document to be processed and the updated topics for each word in the document as a vector. It then processes each update by just calling upd_count on the TypeTopicCounts object with the update details
Implements Model_Refiner.
void Unigram_Model_Trainer::write | ( | void * | token | ) | [virtual] |
Takes the document and writes it to disk. Here we use a simple optimization of not writing the body/words in the document but only the topics. This is because the words in the document never change. Its only the topics that change. The documents are written using a DocumentWriter to disk
Implements Model_Refiner.
long Unigram_Model_Trainer::doc_index = -1 [static] |