Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions data_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,15 @@ def save_spectrogram_tisv():
intervals = librosa.effects.split(utter, top_db=30) # voice activity detection
# this works fine for timit but if you get array of shape 0 for any other audio change value of top_db
# for vctk dataset use top_db=100
for interval in intervals:
if (interval[1]-interval[0]) > utter_min_len: # If partial utterance is sufficient long,
utter_part = utter[interval[0]:interval[1]] # save first and last 180 frames of spectrogram.
S = librosa.core.stft(y=utter_part, n_fft=hp.data.nfft,
win_length=int(hp.data.window * sr), hop_length=int(hp.data.hop * sr))
for (interval_0, interval_1, *interval_len) in intervals:
if interval_1 - interval_0 > utter_min_len: # If partial utterance is sufficient long,
utter_part = utter[interval_0:interval_1] # save first and last 180 frames of spectrogram.
S = librosa.core.stft(y=utter_part, n_fft=hp.data.nfft, win_length=int(hp.data.window * sr), hop_length=int(hp.data.hop * sr))
S = np.abs(S) ** 2
mel_basis = librosa.filters.mel(sr=hp.data.sr, n_fft=hp.data.nfft, n_mels=hp.data.nmels)
S = np.log10(np.dot(mel_basis, S) + 1e-6) # log mel spectrogram of utterances
utterances_spec.append(S[:, :hp.data.tisv_frame]) # first 180 frames of partial utterance
utterances_spec.append(S[:, -hp.data.tisv_frame:]) # last 180 frames of partial utterance

S = np.log10(np.dot(mel_basis, S) + 1e-06) # log mel spectrogram of utterances
utterances_spec.append(S[:, :hp.data.tisv_frame]) # first 180 frames of partial utterance
utterances_spec.append(S[:, -hp.data.tisv_frame:]) # last 180 frames of partial utterance
utterances_spec = np.array(utterances_spec)
print(utterances_spec.shape)
if i<train_speaker_num: # save spectrogram as numpy file
Expand Down