00001 /******************************************************************************* 00002 Copyright (c) 2011, Yahoo! Inc. 00003 All rights reserved. 00004 00005 Redistribution and use of this software in source and binary forms, 00006 with or without modification, are permitted provided that the following 00007 conditions are met: 00008 00009 * Redistributions of source code must retain the above 00010 copyright notice, this list of conditions and the 00011 following disclaimer. 00012 00013 * Redistributions in binary form must reproduce the above 00014 copyright notice, this list of conditions and the 00015 following disclaimer in the documentation and/or other 00016 materials provided with the distribution. 00017 00018 * Neither the name of Yahoo! Inc. nor the names of its 00019 contributors may be used to endorse or promote products 00020 derived from this software without specific prior 00021 written permission of Yahoo! Inc. 00022 00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 00024 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 00025 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 00026 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00027 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00028 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00029 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00030 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00031 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00032 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00033 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00034 00035 The Initial Developer of the Original Code is Shravan Narayanamurthy. 00036 ******************************************************************************/ 00037 /* 00038 * DocumentWriter.h 00039 * 00040 * Created on: 8 May, 2009 00041 * 00042 */ 00043 00044 #ifndef DOCUMENTWRITER_H_ 00045 #define DOCUMENTWRITER_H_ 00046 00047 #include <fstream> 00048 #include "google/protobuf/message.h" 00049 #include "constants.h" 00050 00051 using namespace std; 00052 using namespace LDA; 00053 00054 /** 00055 * Wrapper around protobuf msgs for convenient 00056 * writing of words, topics & (word,index) pairs 00057 * into word, topic, dictionary dump files respectively. 00058 * Each msg is written into a binary file in record* format 00059 * where record=(size of serialized msg,msg serialized as string) 00060 */ 00061 class DocumentWriter { 00062 public: 00063 DocumentWriter(string w_fname_); 00064 virtual ~DocumentWriter(); 00065 bool write(const google::protobuf::Message& msg); 00066 00067 private: 00068 string w_fname; //The output file to write various msgs from 00069 00070 ofstream w_output; //The output stream to write to w_fname 00071 00072 string serialized; //The string to which msgs are serialized to 00073 00074 /** 00075 * The main function that writes records to output 00076 * Assumes that the msg has been serialized by the 00077 * caller into 'seialized' 00078 */ 00079 inline bool write_sized_record_to(ofstream& output) { 00080 LOG_IF(FATAL,serialized.size()>size_t(MAX_MSG_SIZE))<<"Writing file: Message size " << serialized.size() << " exceeds " << MAX_MSG_SIZE << ". Quitting..."; 00081 00082 size_int size = (size_int)serialized.size(); 00083 output.write((char*)&size,sizeof(size_int)); 00084 output.write(serialized.c_str(),size); 00085 output.flush(); 00086 return !output.bad(); 00087 } 00088 00089 //The base method to write msg into out_str 00090 inline bool write_base(ofstream& out_str, const google::protobuf::Message& msg) { 00091 msg.SerializeToString(&serialized); 00092 return write_sized_record_to(out_str); 00093 } 00094 }; 00095 00096 #endif /* DOCUMENTWRITER_H_ */