Update comp_spectrogram_scipy_nnAudio authored by Kentaro Mogushi's avatar Kentaro Mogushi
......@@ -37,9 +37,86 @@ The scales are very different. However, the spectrograms are normalized before b
ok.
### case 1
![Screen_Shot_2021-03-27_at_3.12.02_PM](uploads/ee123f4809e3698f37a4bf5953321c58/Screen_Shot_2021-03-27_at_3.12.02_PM.png)
![Screen_Shot_2021-03-27_at_3.13.49_PM](uploads/2db5e072c40202d8a9783e03442df8ec/Screen_Shot_2021-03-27_at_3.13.49_PM.png)
![Screen_Shot_2021-03-27_at_3.18.16_PM](uploads/5c1349bd0296da0f365fdf862e63b9ca/Screen_Shot_2021-03-27_at_3.18.16_PM.png)
\ No newline at end of file
![Screen_Shot_2021-03-27_at_3.18.16_PM](uploads/5c1349bd0296da0f365fdf862e63b9ca/Screen_Shot_2021-03-27_at_3.18.16_PM.png)
### case 2
![Screen_Shot_2021-03-27_at_3.56.44_PM](uploads/29a589b94dc331c2f32020f46f291cd2/Screen_Shot_2021-03-27_at_3.56.44_PM.png)
# code
```
try_index = 954 # 2841 # 5519 # 2841
#a = map_stft(y_tns[try_index: try_index + 1].float()) # extracted
a = map_stft(y_tns_ds[try_index: try_index + 1].float()) # downsampled
b = (a[:, :, :, 0] ** 2 + a[:, :, :, 1] ** 2) ** 0.5
d = b[0].numpy()
time_resolution_step = np.linspace(0, chopped_duration, d.shape[-1], endpoint=False)
freq_resolution_step = np.linspace(0, target_sample_rate / 2, d.shape[0], endpoint=False)
print('Time resolution in the image: {:.4f} sec'.format(time_resolution_step[1] - time_resolution_step[0]))
print('Frequency resolution in the image: {:.4f} Hz'.format(freq_resolution_step[1] - freq_resolution_step[0]))
im = plt.imshow(d, origin='lower', cmap='OrRd', aspect='auto', vmax=np.percentile(d, 99.))
cbar = plt.colorbar(im)
plt.title("strian, max = {}".format(np.amax(d)))
plt.xticks(np.linspace(0, d.shape[-1], 5), np.linspace(0, chopped_duration, 5, endpoint=True))
plt.yticks(np.linspace(0, d.shape[0], 5), np.linspace(0, target_sample_rate / 2, 5, endpoint=True))
plt.savefig('test.png')
plt.close()
print(d.shape)
#a = map_stft(y_tns[try_index: try_index + 1].float()) # extracted
a = map_stft(X3_tns[try_index: try_index + 1].float()) # downsampled
b = (a[:, :, :, 0] ** 2 + a[:, :, :, 1] ** 2) ** 0.5
d = b[0].numpy()
time_resolution_step = np.linspace(0, chopped_duration, d.shape[-1], endpoint=False)
freq_resolution_step = np.linspace(0, target_sample_rate / 2, d.shape[0], endpoint=False)
print('Time resolution in the image: {:.4f} sec'.format(time_resolution_step[1] - time_resolution_step[0]))
print('Frequency resolution in the image: {:.4f} Hz'.format(freq_resolution_step[1] - freq_resolution_step[0]))
im = plt.imshow(d, origin='lower', cmap='OrRd', aspect='auto', vmax=np.percentile(d, 99.))
cbar = plt.colorbar(im)
plt.title("X3, max = {}".format(np.amax(d)))
plt.xticks(np.linspace(0, d.shape[-1], 5), np.linspace(0, chopped_duration, 5, endpoint=True))
plt.yticks(np.linspace(0, d.shape[0], 5), np.linspace(0, target_sample_rate / 2, 5, endpoint=True))
plt.savefig('test.png')
plt.close()
print(d.shape)
plt.plot(d.flatten())
plt.axhline(np.percentile(d, 99), c='black', ls='--')
plt.xlabel('pixels')
plt.savefig('test.png')
plt.close()
#f, t, Zxx = signal.stft(y_chopped[try_index: try_index + 1], fs=target_sample_rate, nperseg=None) # extraced
f, t, Zxx = signal.stft(y_ds_chopped[try_index: try_index + 1], fs=target_sample_rate, nperseg=None) # downsampled
d = np.abs(Zxx[0])
im = plt.imshow(np.abs(d), origin='lower', cmap='OrRd', aspect='auto', vmax=np.percentile(np.abs(d), 99.))
cbar = plt.colorbar(im)
plt.title("strian, max = {}".format(np.amax(np.abs(d))))
plt.xticks(np.linspace(0, d.shape[-1], 5), np.linspace(0, chopped_duration, 5, endpoint=True))
plt.yticks(np.linspace(0, d.shape[0], 5), np.linspace(0, target_sample_rate / 2, 5, endpoint=True))
plt.savefig('test.png')
plt.close()
print(d.shape)
plt.plot(d.flatten())
plt.axhline(np.percentile(d, 99), c='black', ls='--')
plt.xlabel('pixels')
plt.savefig('test.png')
plt.close()
```
\ No newline at end of file