Skip to content

Commit

Permalink
Merge configs to one file
Browse files Browse the repository at this point in the history
  • Loading branch information
Adibian committed May 10, 2023
1 parent 6075dab commit eed4326
Show file tree
Hide file tree
Showing 13 changed files with 203 additions and 174 deletions.
144 changes: 144 additions & 0 deletions config/Persian/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
############################################################################################
####################################### Main Config ########################################
############################################################################################
dataset: &Dataset "Persian"
multi_speaker: False



############################################################################################
####################################### Synthesizer ########################################
############################################################################################
synthesizer:
############### Main Parameters ###################
main:
device: "cuda" ## cpu or cuda

##################### Model #######################
model:
transformer:
encoder_layer: 4
encoder_head: 2
encoder_hidden: 256
decoder_layer: 6
decoder_head: 2
decoder_hidden: 256
conv_filter_size: 1024
conv_kernel_size: [9, 1]
encoder_dropout: 0.2
decoder_dropout: 0.2

variance_predictor:
filter_size: 256
kernel_size: 3
dropout: 0.5

variance_embedding:
pitch_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the pitch values are not normalized during preprocessing
energy_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the energy values are not normalized during preprocessing
n_bins: 256
max_seq_len: 1000

#################### Preprocesss #####################
preprocess:
path:
corpus_path: !join ["dataset/", *Dataset, "synthesizer_data/train_data"]
raw_path: !join ["synthesizer/raw_data/", *Dataset]
preprocessed_path: !join ["synthesizer/preprocessed_data/", *Dataset]
preprocessing:
val_size: 100
text:
text_cleaners: ["persian_cleaners"] ## ljspeech_cleaner od persian_cleaner
language: "fa" ## fa or en
audio:
sampling_rate: 22050
max_wav_value: 32768.0
stft:
filter_length: 1024
hop_length: 256
win_length: 1024
mel:
n_mel_channels: 80
mel_fmin: 0
mel_fmax: 8000
pitch:
feature: "phoneme_level" # support 'phoneme_level' or 'frame_level'
normalization: True
energy:
feature: "phoneme_level" # support 'phoneme_level' or 'frame_level'
normalization: True

#################### Training #####################
train:
path:
ckpt_path: !join ["output/", *Dataset, "synthesizer/ckpt"]
log_path: !join ["output/", *Dataset, "synthesizer/log"]
result_path: !join ["output/", *Dataset, "synthesizer/result"]
optimizer:
batch_size: 16
betas: [0.9, 0.98]
eps: 0.000000001
weight_decay: 0.0
grad_clip_thresh: 1.0
grad_acc_step: 1
warm_up_step: 4000
anneal_steps: [300000, 400000, 500000]
anneal_rate: 0.3
step:
total_step: 1010000
log_step: 500
synth_step: 1000
val_step: 1000
save_step: 100000



############################################################################################
########################################### ResGrad ########################################
############################################################################################
resgrad:

#################### Data #####################
data:
batch_size: 32
target_data_dir: !join ["dataset/", *Dataset, "resgrad_data/mel_target"]
input_data_dir: !join ["dataset/", *Dataset, "resgrad_data/mel_prediction"]
durations_dir: !join ["dataset/", *Dataset, "resgrad_data/durations"]
val_size: 16
preprocessed_path: "processed_data"
normalized_method: "min-max"

shuffle_data: True
normallize_spectrum: True
min_spec_value: -13
max_spec_value: 3
normallize_residual: True
min_residual_value: -0.25
max_residual_value: 0.25
max_win_length: 100 ## maximum size of window in spectrum

################## Training ###################
train:
lr: 1e-4
epochs: 70
save_model_path: !join ["output/", *Dataset, "resgrad/ckpt"]
validate_every_n_step: 20
log_dir: !join ["output/", *Dataset, "resgrad/log"]
save_path: 'checkpoint'

############ Model Parameters #################
model:
model_type1: "spec2residual" ## "spec2spec" or "spec2residual"
model_type2: "segment-based" ## "segment-based" or "sentence-based"
n_feats: 80
dim: 64
n_spks: 1
spk_emb_dim: 64
beta_min: 0.05
beta_max: 20.0
pe_scale: 1000

############ Main Parameters #################
main:
device: "cuda"

32 changes: 15 additions & 17 deletions resgrad_data.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from utils import load_models, get_synthesizer_configs
from utils import load_models, load_yaml_file
from synthesizer.synthesize import infer as synthesizer_infer

import argparse
import os
from tqdm import tqdm
import numpy as np
import torch
import yaml


def read_input_data(raw_data_path):
Expand All @@ -24,37 +25,34 @@ def main():
parser = argparse.ArgumentParser()
parser.add_argument("--synthesizer_restore_step", type=int, required=True)
parser.add_argument("--raw_data_path", type=str, default="synthesizer/raw_data", required=False)
parser.add_argument("--synthesizer_preprocess_config", type=str, default="synthesizer/config/persian/preprocess.yaml", required=False)
parser.add_argument("--synthesizer_model_config", type=str, default="synthesizer/config/persian/model.yaml", required=False)
parser.add_argument("--synthesizer_train_config", type=str, default="synthesizer/config/persian/train.yaml", required=False)
parser.add_argument("-c", "--config", type=str, default='config/Persian/config.yaml', required=False, help="path to config.yaml")
args = parser.parse_args()

synthesizer_configs = get_synthesizer_configs(args.synthesizer_preprocess_config, args.synthesizer_model_config, args.synthesizer_train_config)

# Read Config
config = load_yaml_file(args.config)

restore_steps = {"synthesizer":args.synthesizer_restore_step, "resgrad":None, "vocoder":None}
synthesizer_model, _, _ = load_models(restore_steps, synthesizer_configs)
text_data = read_input_data(os.path.join(args.raw_data_path, synthesizer_configs['preprocess_config']['dataset']))
synthesizer_model, _, _ = load_models(restore_steps, config['synthesizer'])
text_data = read_input_data(args.raw_data_path)

current_path = os.getcwd()
save_path = os.path.join(current_path, "dataset", synthesizer_configs['preprocess_config']['dataset'], "resgrad_data")
duration_dir = os.path.join(save_path, 'durations')
mel_pred_dir = os.path.join(save_path, 'mel_prediction')
mel_target_dir = os.path.join(save_path, 'mel_target')
duration_dir = os.path.join(current_path, config['resgrad']['data']['durations_dir'])
mel_pred_dir = os.path.join(current_path, config['resgrad']['data']['input_data_dir'])
mel_target_dir = os.path.join(current_path, config['resgrad']['data']['mel_target'])

os.makedirs(save_path, exist_ok=True)
os.makedirs(mel_pred_dir, exist_ok=True)
if not os.path.islink(mel_target_dir):
os.symlink(os.path.join(current_path, synthesizer_configs['preprocess_config']['path']['preprocessed_path'], 'mel'), mel_target_dir, target_is_directory=True)
os.symlink(os.path.join(current_path, config['synthesizer']['preprocess']['path']['preprocessed_path'], 'mel'), mel_target_dir, target_is_directory=True)
if not os.path.islink(duration_dir):
os.symlink(os.path.join(current_path, synthesizer_configs['preprocess_config']['path']['preprocessed_path'], 'duration'), duration_dir, target_is_directory=True)
os.symlink(os.path.join(current_path, config['synthesizer']['preprocess']['path']['preprocessed_path'], 'duration'), duration_dir, target_is_directory=True)

device = synthesizer_configs['model_config']['device']
device = config['synthesizer']['main']['device']
for (speaker, file_name), text in tqdm(text_data.items()):
dur_file_name = speaker + "-duration-" + file_name + ".npy"
dur_target = torch.from_numpy(np.load(os.path.join(duration_dir, dur_file_name))).to(device).unsqueeze(0)

control_values = 1.0,1.0,1.0
mel_prediction, _, _, _ = synthesizer_infer(synthesizer_model, text, control_values, synthesizer_configs['preprocess_config'],
mel_prediction, _, _, _ = synthesizer_infer(synthesizer_model, text, control_values, config,
device, d_target=dur_target)
file_path = os.path.join(mel_pred_dir, file_name)
np.save(file_path, mel_prediction[0].cpu())
Expand Down
28 changes: 0 additions & 28 deletions synthesizer/config/persian/model.yaml

This file was deleted.

30 changes: 0 additions & 30 deletions synthesizer/config/persian/preprocess.yaml

This file was deleted.

20 changes: 0 additions & 20 deletions synthesizer/config/persian/train.yaml

This file was deleted.

41 changes: 3 additions & 38 deletions synthesizer/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
import argparse
import torch
import yaml
from torch.utils.data import DataLoader

from .utils.model import get_model
from .utils.tools import to_device, log, synth_one_sample
from .model import FastSpeech2Loss
from .dataset import Dataset


def evaluate(model, step, configs, logger=None, vocoder=None):
preprocess_config, model_config, train_config = configs
device = model_config['device']
def evaluate(model, step, config, logger=None, vocoder=None):
preprocess_config, model_config, train_config = config['synthesizer']['preprocess'], config['synthesizer']['model']. config['synthesizer']['train']
device = config['synthesizer']['main']['device']
# Get dataset
dataset = Dataset(
"val.txt", preprocess_config, train_config, sort=False, drop_last=False
Expand Down Expand Up @@ -80,35 +77,3 @@ def evaluate(model, step, configs, logger=None, vocoder=None):
return message


if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument("--restore_step", type=int, default=30000)
parser.add_argument(
"-p",
"--preprocess_config",
type=str,
required=True,
help="path to preprocess.yaml",
)
parser.add_argument(
"-m", "--model_config", type=str, required=True, help="path to model.yaml"
)
parser.add_argument(
"-t", "--train_config", type=str, required=True, help="path to train.yaml"
)
args = parser.parse_args()

# Read Config
preprocess_config = yaml.load(
open(args.preprocess_config, "r"), Loader=yaml.FullLoader
)
model_config = yaml.load(open(args.model_config, "r"), Loader=yaml.FullLoader)
train_config = yaml.load(open(args.train_config, "r"), Loader=yaml.FullLoader)
configs = (preprocess_config, model_config, train_config)

# Get model
model = get_model(args, configs, device, train=False).to(device)

message = evaluate(model, args.restore_step, configs)
print(message)
11 changes: 5 additions & 6 deletions synthesizer/prepare_align.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import argparse

import yaml

from preprocessor import persian
from .preprocessor import persian
from ..utils import load_yaml_file


def main(config):
Expand All @@ -12,8 +11,8 @@ def main(config):

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("config", type=str, help="path to preprocess.yaml")
parser.add_argument("config", type=str, help="path to config.yaml")
args = parser.parse_args()

config = yaml.load(open(args.config, "r"), Loader=yaml.FullLoader)
main(config)
config = load_yaml_file(args.config)
main(config['synthesizer']['preprocess'])
11 changes: 5 additions & 6 deletions synthesizer/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import argparse

import yaml

from preprocessor.preprocessor import Preprocessor
from .preprocessor.preprocessor import Preprocessor
from ..utils import load_yaml_file


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("config", type=str, help="path to preprocess.yaml")
parser.add_argument("config", type=str, help="path to config.yaml")
args = parser.parse_args()

config = yaml.load(open(args.config, "r"), Loader=yaml.FullLoader)
preprocessor = Preprocessor(config)
config = load_yaml_file(args.config)
preprocessor = Preprocessor(config['synthesizer']['preprocess'])
preprocessor.build_from_path()
Loading

0 comments on commit eed4326

Please sign in to comment.