00001 /******************************************************************************* 00002 Copyright (c) 2011, Yahoo! Inc. 00003 All rights reserved. 00004 00005 Redistribution and use of this software in source and binary forms, 00006 with or without modification, are permitted provided that the following 00007 conditions are met: 00008 00009 * Redistributions of source code must retain the above 00010 copyright notice, this list of conditions and the 00011 following disclaimer. 00012 00013 * Redistributions in binary form must reproduce the above 00014 copyright notice, this list of conditions and the 00015 following disclaimer in the documentation and/or other 00016 materials provided with the distribution. 00017 00018 * Neither the name of Yahoo! Inc. nor the names of its 00019 contributors may be used to endorse or promote products 00020 derived from this software without specific prior 00021 written permission of Yahoo! Inc. 00022 00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 00024 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 00025 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 00026 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00027 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00028 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00029 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00030 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00031 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00032 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00033 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00034 00035 The Initial Developer of the Original Code is Shravan Narayanamurthy. 00036 ******************************************************************************/ 00037 /* 00038 * Unigram_Model_Tester.h 00039 * 00040 * Created on: 06-Jan-2011 00041 * 00042 */ 00043 00044 #ifndef UNIGRAM_MODEL_TESTER_H_ 00045 #define UNIGRAM_MODEL_TESTER_H_ 00046 00047 #include "TopicLearner/Model_Refiner.h" 00048 #include "TypeTopicCounts.h" 00049 #include "TopicLearner/Parameter.h" 00050 #include "DocumentReader.h" 00051 #include "DocumentWriter.h" 00052 #include <boost/random/variate_generator.hpp> 00053 #include <boost/random/uniform_real.hpp> 00054 #include "WordIndexDictionary.h" 00055 00056 using namespace boost; 00057 using namespace std; 00058 00059 class Unigram_Model_Tester: public Model_Refiner { 00060 public: 00061 Unigram_Model_Tester(TypeTopicCounts&, Parameter&, Parameter&, 00062 WordIndexDictionary&, bool no_init = false); 00063 virtual ~Unigram_Model_Tester(); 00064 00065 google::protobuf::Message* allocate_document_buffer(size_t); 00066 void deallocate_document_buffer(google::protobuf::Message*); 00067 google::protobuf::Message* get_nth_document( 00068 google::protobuf::Message* docs, size_t n); 00069 void* read(google::protobuf::Message&); 00070 void* sample(void*); 00071 void* update(void*); 00072 void* optimize(void*); 00073 void* eval(void*, double&); 00074 void write(void*); 00075 void iteration_done(); 00076 00077 void* test(void*); 00078 00079 static long doc_index; //Running count of all the documents processed by the optimizer 00080 00081 private: 00082 void set_up_io(string, string); 00083 void release_io(); 00084 00085 protected: 00086 TypeTopicCounts& _ttc; 00087 Parameter& _alpha; 00088 Parameter& _beta; 00089 bool ignore_old_topic; 00090 int _num_words, _num_topics; 00091 //Reader 00092 DocumentReader *_wdoc_rdr; 00093 DocumentWriter *_tdoc_writer; 00094 }; 00095 00096 #endif /* UNIGRAM_MODEL_TESTER_H_ */