00001 /******************************************************************************* 00002 Copyright (c) 2011, Yahoo! Inc. 00003 All rights reserved. 00004 00005 Redistribution and use of this software in source and binary forms, 00006 with or without modification, are permitted provided that the following 00007 conditions are met: 00008 00009 * Redistributions of source code must retain the above 00010 copyright notice, this list of conditions and the 00011 following disclaimer. 00012 00013 * Redistributions in binary form must reproduce the above 00014 copyright notice, this list of conditions and the 00015 following disclaimer in the documentation and/or other 00016 materials provided with the distribution. 00017 00018 * Neither the name of Yahoo! Inc. nor the names of its 00019 contributors may be used to endorse or promote products 00020 derived from this software without specific prior 00021 written permission of Yahoo! Inc. 00022 00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 00024 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 00025 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 00026 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00027 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00028 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00029 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00030 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00031 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00032 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00033 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00034 00035 The Initial Developer of the Original Code is Shravan Narayanamurthy. 00036 ******************************************************************************/ 00037 /* 00038 * TopKList.h 00039 * A list that maintains top K records 00040 * Each record is assumed to be the 00041 * cnt_topic_t union. The abstraction 00042 * of the record is (word,cnt) pair 00043 * So top K records indicate K unique 00044 * records having the highest count 00045 * 00046 * The main assumption is that the 00047 * records inserted should be unique 00048 * wrt to the words. Same word with 00049 * different counts should not be 00050 * inserted. Results are undefined 00051 * 00052 * Created on: 14 May, 2009 00053 * 00054 */ 00055 00056 #ifndef TOPKLIST_H_ 00057 #define TOPKLIST_H_ 00058 #include "constants.h" 00059 #include <algorithm> 00060 #include "comparator.h" 00061 00062 typedef cnt_topic_t cnt_word_t; 00063 00064 class TopKList { 00065 public: 00066 TopKList(int K_); 00067 virtual ~TopKList(); 00068 //Assumes that the words inserted are unique and doesn't check this explicitly 00069 void insert_word(const cnt_word_t& cnt_word); 00070 bool is_sorted(); 00071 cnt_word_t get_max(); 00072 typedef cnt_topic_t* iterator; 00073 iterator get_beg(); 00074 iterator get_end(); 00075 void print(); 00076 void clear(); 00077 00078 private: 00079 int K, //The val K in top K 00080 num_elements; //The actual num of elements stored; can be lesser than K 00081 00082 cnt_word_t* array; //The array which stores the elements. Its always kept sorted 00083 00084 int32_t min; //The min count. Used to chk if an incoming element should be inserted 00085 00086 void insert_into_array(const cnt_word_t& cnt_word); 00087 }; 00088 00089 #endif /* TOPKLIST_H_ */