00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046 #ifndef MAIN_FLAGS_DEFINE_H_
00047 #define MAIN_FLAGS_DEFINE_H_
00048
00049 #include "gflags/gflags.h"
00050 #include "constants.h"
00051 #include "tbb/task_scheduler_init.h"
00052
00053 DEFINE_int32(iter,1000,"Number of iterations the topic modeller should be run");
00054 DEFINE_int32(burnin,299,"Number of iterations after which alpha optimization should be to be run after every <optimizestats> iterations");
00055 DEFINE_int32(optimizestats,25,"Optimize hyper parameters every these many iterations");
00056 DEFINE_int32(printloglikelihood,25,"Print log likelihood after every <printlogLikelihood> iterations after burn-in");
00057 DEFINE_int32(topics,100,"The number of topics to be used by LDA.");
00058
00059
00060 DEFINE_string(inputprefix,"lda","The output prefix used for the FormatData routine");
00061 DEFINE_string(dumpprefix,"","The word-topic counts are initialized from this file which is generated by the preprocessing step or at the end of an iteration");
00062
00063
00064 DEFINE_bool(restart,false,"Indicates use of failure recovery mode. The iteration to start with should also be specified");
00065 DEFINE_bool(online,false,"Uses online initialization instead of random");
00066 DEFINE_int32(startiter,1,"This the iteration at which failure recovery should start");
00067 DEFINE_bool(test,false,"Run the test pipeline. No updates are done & requires an earlier dump of the word-topic counts table");
00068 DEFINE_bool(teststream,false,"Run the test pipeline in streaming mode. Formatting is a part of the pipeline. No updates are done & requires an earlier dump of the word-topic counts table & dictionary");
00069 DEFINE_double(alpha,ALPHA_SUM,"Weight of the Dirichlet conjugate for topics");
00070 DEFINE_double(beta,BETA,"Weight of the Dirichlet conjugate for words");
00071 DEFINE_int32(chkptinterval,25,"The topic assignments are saved every these many iterations");
00072 DEFINE_string(chkptdir,"","The directory to which the checkpoints need to written");
00073 DEFINE_string(servers,"specify","The set of all memcached servers that are storing the state. E.g. 192.168.0.1, 192.168.0.3:44, 200.132.12.34");
00074 DEFINE_int32(numdumps,1,"Number of word-topic count dumps in the training data");
00075 DEFINE_int32(maxmemory,2048,"The max memory that can be used");
00076 DEFINE_string(dictionary,"specify","The dump of the global dictionary produced in the training run. To be use for teststream");
00077
00078 DEFINE_int32(livetokens,500,"Max Live Tokens in pipeline");
00079 DEFINE_int32(model,1,"Unigram-1");
00080 DEFINE_int32(samplerthreads,tbb::task_scheduler_init::automatic,"The number of foreground threads that run actual LDA pipeline. Default is to figure out automatically");
00081 #endif