00001 /******************************************************************************* 00002 Copyright (c) 2011, Yahoo! Inc. 00003 All rights reserved. 00004 00005 Redistribution and use of this software in source and binary forms, 00006 with or without modification, are permitted provided that the following 00007 conditions are met: 00008 00009 * Redistributions of source code must retain the above 00010 copyright notice, this list of conditions and the 00011 following disclaimer. 00012 00013 * Redistributions in binary form must reproduce the above 00014 copyright notice, this list of conditions and the 00015 following disclaimer in the documentation and/or other 00016 materials provided with the distribution. 00017 00018 * Neither the name of Yahoo! Inc. nor the names of its 00019 contributors may be used to endorse or promote products 00020 derived from this software without specific prior 00021 written permission of Yahoo! Inc. 00022 00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 00024 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 00025 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 00026 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00027 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00028 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00029 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00030 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00031 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00032 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00033 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00034 00035 The Initial Developer of the Original Code is Shravan Narayanamurthy. 00036 ******************************************************************************/ 00037 /* 00038 * sampler.h 00039 * 00040 * The sampling function which samples the 00041 * new topic assignment using the collapsed 00042 * Gibbs Sampler approach. It takes in the 00043 * topic counts for the current word, the 00044 * local topic counts for the document being 00045 * processed and Abar, Bbar & Ccached 00046 * 00047 * The procedure is simple. It computes C(t) 00048 * and Cbar. It then generates a 00049 * random number using the uniform RNG passed 00050 * in. It scales the number by Abar + Bbar + Cbar 00051 * It checks to which probability mass the number 00052 * generated belongs. Based on that samples the 00053 * topic responsible for generating this number 00054 * 00055 * Created on: 24 Apr, 2009 00056 * 00057 */ 00058 00059 #ifndef SAMPLER_H_ 00060 #define SAMPLER_H_ 00061 00062 #include "constants.h" 00063 #include <boost/random/variate_generator.hpp> 00064 #include <boost/random/uniform_real.hpp> 00065 #include "tbb/atomic.h" 00066 #include "TopicCounts.h" 00067 00068 using namespace boost; 00069 using namespace tbb; 00070 namespace sampler { 00071 00072 topic_t 00073 sample( 00074 const topicCounts* currentTypeTopicCounts, 00075 const topic_t old_topic, 00076 const atomic<topic_t>* tokens_per_topic, 00077 const topic_t* loca_topic_counts, 00078 const topic_t* loca_topic_index, 00079 const int& non_zero_topics, 00080 const double& smoothingOnlyMass, 00081 const double& topicBetaMass, 00082 const double* cachedCoefficients, 00083 const double& betaSum, 00084 const double* alpha, 00085 double* topic_term_scores, 00086 const topic_t& numTopics, 00087 variate_generator<base_generator_type&, boost::uniform_real<> >* unif01); 00088 00089 } // namespace sampler 00090 #endif /* SAMPLER_H_ */