00001 /******************************************************************************* 00002 Copyright (c) 2011, Yahoo! Inc. 00003 All rights reserved. 00004 00005 Redistribution and use of this software in source and binary forms, 00006 with or without modification, are permitted provided that the following 00007 conditions are met: 00008 00009 * Redistributions of source code must retain the above 00010 copyright notice, this list of conditions and the 00011 following disclaimer. 00012 00013 * Redistributions in binary form must reproduce the above 00014 copyright notice, this list of conditions and the 00015 following disclaimer in the documentation and/or other 00016 materials provided with the distribution. 00017 00018 * Neither the name of Yahoo! Inc. nor the names of its 00019 contributors may be used to endorse or promote products 00020 derived from this software without specific prior 00021 written permission of Yahoo! Inc. 00022 00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 00024 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 00025 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 00026 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00027 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00028 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00029 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00030 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00031 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00032 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00033 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00034 00035 The Initial Developer of the Original Code is Shravan Narayanamurthy. 00036 ******************************************************************************/ 00037 /* 00038 * TopicCounts.h 00039 * The main structure that stores the sparse 00040 * topic counts vector. It uses a blocked 00041 * allocation scheme in that memory is allocated 00042 * and deallocated in blocks. Blocked allocation 00043 * is used to reduce heap fragmentation. 00044 * 00045 * The elements are of the type cnt_topic_t 00046 * which packs both the topic and the count 00047 * into a single 64bit integer. 00048 * 00049 * Supports a map-view of the sparse vector, 00050 * but you don't have order information in 00051 * this view. So use with discretion. 00052 * 00053 * Its a struct for speed. So everything is 00054 * public. Don't mess with the items, length 00055 * and origLenth if you aren't sure about 00056 * what you are doing. Use the methods 00057 * provided making sure the assumptions hold 00058 * 00059 * Created on: 14 Oct, 2009 00060 * 00061 */ 00062 00063 #ifndef TOPICCOUNTS_H_ 00064 #define TOPICCOUNTS_H_ 00065 00066 #include "types.h" 00067 #include <algorithm> 00068 #include "boost/unordered_map.hpp" 00069 #include "tbb/atomic.h" 00070 #include "comparator.h" 00071 #include <vector> 00072 00073 class simple_map; 00074 00075 typedef struct TopicCounts { 00076 cnt_topic_t* items; //The actual array holding data 00077 //which is dynamically reshaped 00078 //This is always sorted in descending 00079 //order and only has non-zero entries 00080 //Methods do not check for uniqueness 00081 //but assume uniqueness. 00082 //Responsiblity of user to ensure 00083 //uniqueness 00084 00085 topic_t length; //The number of elements stored in the array 00086 00087 topic_t origLength; //The size of the allocated array 00088 00089 std::vector<cnt_topic_t> vec_items; 00090 00091 //mapped_vec tmp_map; //A temporary map to hasten some 00092 //internal update operations 00093 int frequency; 00094 bool QUIT; 00095 00096 /***** Init *****/ 00097 TopicCounts(); 00098 TopicCounts(int length); 00099 TopicCounts(cnt_topic_t* it, int len); 00100 TopicCounts(const std::string& counts); 00101 void init(cnt_topic_t* it, int len); 00102 void init(const std::string& counts); 00103 ~TopicCounts(); 00104 void assign(int length, bool setLen = true); 00105 void setLength(int length_); 00106 /***** Init *****/ 00107 /***** Getters *****/ 00108 void findOldnNew(topic_t oldTopic, topic_t newTopic, topic_t** oldTop, 00109 topic_t** newTop); 00110 int get_frequency(); 00111 cnt_t get_counts(topic_t topic); 00112 int convertTo(mapped_vec& map, int mult = 1) const; 00113 void convertTo(simple_map& map, int mult = 1) const; 00114 void convertTo(std::string& counts) const; 00115 int convertTo_d(mapped_vec& map, double mult) const; 00116 //int computeFrequency(); 00117 //bool matchFrequency(); 00118 /***** Getters *****/ 00119 00120 /***** Setters *****/ 00121 bool findAndIncrement(topic_t topic); 00122 bool findAndDecrement(topic_t topic); 00123 //void setFrequency(); 00124 00125 void compact(); 00126 00127 void addNewTop(topic_t topic, cnt_t count = 1); 00128 void addNewTopAftChk(topic_t topic, cnt_t count = 1); 00129 void upd_count(mapped_vec& delta, tbb::atomic<topic_t>* t = NULL); 00130 00131 //Convenience updates 00132 void operator+=(TopicCounts& inp); 00133 void operator-=(TopicCounts& inp); 00134 00135 void removeOldTop(topic_t ind, cnt_topic_t& ct); 00136 00137 void replace(TopicCounts& tc); 00138 00139 void decrement(topic_t ind, topic_t** newTop); 00140 void increment(topic_t ind); 00141 /***** Setters *****/ 00142 00143 //Test & Debug 00144 std::string print(); 00145 TopicCounts(mapped_vec& map); 00146 bool equal(const TopicCounts& expected); 00147 } topicCounts; 00148 00149 #endif /* TOPICCOUNTS_H_ */