-
Notifications
You must be signed in to change notification settings - Fork 5
/
GMGRU_H_F_T.sh
executable file
·219 lines (208 loc) · 6.88 KB
/
GMGRU_H_F_T.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#this folder will be created, and all the models and results will be list there.
CONFIG_DIR=/forecasting/selected/GMGRU_H_F_T/
WORK_DIR=$ROOT_DIR/Expt/workdir/$CONFIG_DIR/
TRAINING_DIR=$WORK_DIR/training/
# cluster strategy for psyc dataset
CLUSTER_STRATEGY=MISC11_ML
INPUT_DIR=psyc_${CLUSTER_STRATEGY}_17_padding
# use pretrained word and char embedding using prepare.sh
VOCAB_DIR=$DATA_DIR/vocab_elmo_pre1024_u0n_s0n_u_8/
mkdir -p $VOCAB_DIR
# Use ../../utils/preprocess.py to compute score for each paragraph
#TRAIN_FILENAME1=${INPUT_DIR}/train_sc20.json
TRAIN_FILENAME1=${INPUT_DIR}/train.json
#DEV_FILENAME1=${INPUT_DIR}/dev_sc10.json
DEV_FILENAME1=${INPUT_DIR}/dev.json
TEST_FILENAME1=${INPUT_DIR}/test.json
#TEST_FILENAME1=${INPUT_DIR}/dev_sc20.json
RO_TRAIN_FILE1=$RO_DATA_DIR/$TRAIN_FILENAME1
RO_DEV_FILE1=$RO_DATA_DIR/$DEV_FILENAME1
RO_TEST_FILE1=$RO_DATA_DIR/$TEST_FILENAME1
TRAIN_FILE1=$DATA_DIR/prep_data/rmstop_0_rpt_no/$TRAIN_FILENAME1
DEV_FILE1=$DATA_DIR/prep_data/rmstop_0_rpt_no/$DEV_FILENAME1
TEST_FILE1=$DATA_DIR/prep_data/rmstop_0_rpt_no/$TEST_FILENAME1
ALGO="GMLSTM"
LEARNING_RATE=0.0005
#WEIGHT_DECAY=0.0001
WEIGHT_DECAY=0.0
BATCH_SIZE=64
CONTEXT_WINDOW=8
QUESTION_WINDOW=1
HIDDEN_SIZE=128
EPOCH=100
STEPS_PER_CHECKPOINT=100
# DROP_KEPP_PROB in (0, 1], 1 is no dropout
DROP_KEEP_PROB=0.3
USE_SELFATT=
USE_CHAR_EMBED=
MAX_NUM_CHAR_TO_KEEP_FORWARD=4
MAX_NUM_CHAR_TO_KEEP_BACKWARD=4
#USE_CHAR_EMBED=x
# Whether to training the original embedding.
TRAIN_EMBEDDING=
# max_grad_norm / max(global_norm, max_grad_norm), set to inf to disable.
MAX_GRAD_NORM=5
# leave it empty to use random initial WORD_EMB
WORD_EMB_FILE=$RO_DATA_DIR/glove.840B.300d.txt
# WORD_EMB_FILE=
#WORD_EMB_FILE=$DATA_DIR/vectors_words.txt
CHAR_EMB_FILE=
#CHAR_EMB_FILE=$RO_DATA_DIR/glove.840B.300d-char.txt
#CHAR_EMB_FILE=$DATA_DIR/vectors_chars.txt
EMA=0.9999
MAX_P_LEN=1000
MAX_Q_LEN=60
NUM_FILTERS=25
ACC_SUM_PROB=0.9
#flat Context-aware question attention
FLAT_C_Q_ATT=
# pos_weight for balanced cross entropy
POS_WEIGHT=0.9
# set gama = 0, decay to standard cross entropy
# key for tokenization to use
TOKEN_KEY_TO_USE=tokenized_utterance
# whether adding p encoding to decode
DECODE_P=
# whether adding q encoding to decode
DECODE_Q=
# TOPK, a list of integers for K values in Recall@K
TOPK_LIST=1,2,3,5,10
# TOPM_FOR_ACC_PROB, with ACC_SUM_PROB in topM
TOPM_FOR_ACC_PROB=5
# WORD_EMBED_SIZE, default 300, exclusive with WORD_EMB_FILE
WORD_EMBED_SIZE=300
# CHAR_EMBED_SIZE, default 100, exclusive with CHAR_EMB_FILE
CHAR_EMBED_SIZE=300
# flat Question-aware context attention
FLAT_Q_C_ATT=
# Dropout keep prob for embedding, 1.0=no_dropout
DROPOUT_KEEP_PROB_EMB=0.7
# Method to encode the dialogue
DIAL_ENCODE=HIERARCHY
# max_length for a single utterance
MAX_U_LEN=60
# whether to hierarchy_q_pu_att
HIERARCHY_Q_PU_ATT=x
# self-att for hierarchy, only can be useful when dial_encode=HIERARCHY
USE_HIERARCHY_SELFATT=
# ema_decay is decay ratio for EMA, 0.0 to disable, 0.9999+ to enable
EMA_DECAY=0.0
# loss_func, default=X_ENTROPY
# LOSS_FUNC=X_ENTROPY
#LOSS_FUNC=WEIGHTED_CE
LOSS_FUNC=WEIGHTED_FOCAL
# rnn_type, bi-lstm, bi-gru, bi-rnn, lstm, gru, rnn
RNN_TYPE=bi-gru
# whether to use shared encode layer for utterance
USE_SHARED_ENCODING=
# all training files to use
TRAIN_FILES=$TRAIN_FILE1
#TRAIN_FILES=`find ${TRAIN_FILE1}_splits -name "split*" | tr '\n' ','`
# all dev files to use
DEV_FILES=$DEV_FILE1
# all test files to use
TEST_FILES=$TEST_FILE1
# elmo pretrained LM weight file
#ELMO_WEIGHT_FILE=$DATA_DIR/psyc_elmo/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5
ELMO_WEIGHT_FILE=$DATA_DIR/psyc_elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5
#ELMO_WEIGHT_FILE=$DATA_DIR/advising_elmo/advising.256.weights.hdf5
# elmo corresponding to options file
#https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5
#https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json
#ELMO_OPTION_FILE=$DATA_DIR/psyc_elmo/elmo_2x1024_128_2048cnn_1xhighway_options.json
ELMO_OPTION_FILE=$DATA_DIR/psyc_elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json
#ELMO_OPTION_FILE=$DATA_DIR/advising_elmo/advising.256.options.json
# elmo max num character
# elmo embedding output size to be projected into
ELMO_EMB_OUTPUT=1024
# whether use character elmo emebdding
USE_CHARACTER_ELMO=x
# positions to inject elmo, keep empty to disable
ELMO_POSITIONS=input
# elmo vocabulary file to write and read
ELMO_VOCAB_FILE=$VOCAB_DIR/psyc_elmo_vocab_file
# elmo snt dict file used for utterance cache
ELMO_SNT_DICT_FILE=$DATA_DIR/prep_data/rmstop_0_rpt_no/psyc_MISC11_ML_17_padding/elmo_snt_dict_file.without
# elmo utterance cache file
ELMO_U_CACHE_FILE=$VOCAB_DIR/u_cache.hdf5
# elmo passage cache file
ELMO_P_CACHE_FILE=$VOCAB_DIR/p_cache.hdf5
# elmo question cache file
ELMO_Q_CACHE_FILE=$VOCAB_DIR/q_cache.hdf5
# input used to decode
DECODE_INPUTS=p_final
#MEMNET PARAMS
GATED_MEMNET=x
PASSAGE_HOPS=2
MEMNET_SHARE_WEIGHTS=x
# whether to use concat p
USE_CONCAT_P=
# decode_func to use for multiclass decoding
DECODE_FUC=FC
# flat Context-ware response attention
FLAT_C_R_ATT=
# flat response-ware context attention
FLAT_R_C_ATT=
# whether to hierarchy_r_pu_att
HIERARCHY_R_PU_ATT=
# whether adding r encoding to cnn decode
DECODE_R=
# r pass memnet hops
RESPONSE_HOPS=2
# use response utterance or not
USE_RESPONSE_U=
# decode goal
DECODE_GOAL=T_LABEL
# Whether to use speaker embedding
USE_SPEAKER_EMBEDDING=x
# Whether to use label embedding
USE_LABEL_EMBEDDING=
# dim of label embedding
LABEL_EMBED_DIM=32
# dim of speaker embedding
SPEAKER_EMBED_DIM=8
# filter sizes for cnn
FILTER_SIZES=3,4,5
# whether to decode r with flatten pu_labels embdding
DECODE_R_WITH_FLATTEN_PU_LABELS=
# whether to use response speaker info
USE_RESPONSE_S=x
# whether to train speaker emb
TRAIN_SPEAKER_EMBEDDING=x
# whether to train label emb
TRAIN_LABEL_EMBEDDING=x
# dropout keep rate for MLP
DROPOUT_KEEP_PROB_MLP=0.8
# num_attention_heads for snt seq attention
NUM_ATT_HEAD=4
# snt-levl attention algorithm, leave empty for disabling
SNT_ATT_ALGO=
# snt-levl attention hops
SNT_SEQ_HOPS=2
# snt rnn type, for snt-lvl rnn
SNT_RNN_TYPE=gru
# loss_weights for each label, sep with comma, can be float
# LOSS_WEIGHTS=1.0,1.0,0.25,
# LOSS_WEIGHTS=1.0,1.0,0.25,0.50,0.75,0.75,0.50,0.75,0.75,1.0,1.0
LOSS_WEIGHTS=0.50,1.0,1.0,1.0,0.75,0.75,1.0,1.0
# focal loss gama for each label, sep with comma, int
# FOCAL_LOSS_GAMA=0,0,0,0,0,0,0,0,0,0,0
FOCAL_LOSS_GAMA=2,2,2,2,2,2,2,2
# use response in context seq, without its label
USE_R_IN_SEQ=
# how to combine the final input states
DECODE_COMBINE=additive
#config file for P model
P_MODEL_CONFIG=
#config file for T model
T_MODEL_CONFIG=
# whether use r in word matching
USE_R_IN_WM_SEQ=
# whether use batch normalization
USE_BATCH_NORM=
# word attention heads
WORD_ATT_HEAD=1
# word attention hops
WORD_ATT_HOPS=1
# id filter file path
ID_FILTER_FILE=