import cv2 as cv
import numpy as np
from scipy.io import wavfile
+import scipy.signal as sps
import matplotlib.pyplot as plt
from multiprocessing import Pool
self.lower_limit = -40
self.upper_limit = 100
+ self.amplitude_max = 180
+ self.amplitude_min = 0
+ self.angle_max = 255
+ self.angle_min = 100
+
+ self.amplitude_relative = self.amplitude_max - self.amplitude_min
+ self.angle_relative = self.angle_max - self.angle_min
+
+
def stft(
self,
data: np.ndarray
amplitude = 20*np.log10(amplitude)
amplitude = np.clip(amplitude, self.lower_limit, self.upper_limit)
amplitude -= self.lower_limit
- amplitude *= (255 / self.upper_limit)
+ amplitude *= (self.amplitude_relative / self.upper_limit) + self.amplitude_min
angle = np.angle(spectrum)
- angle = (angle + np.pi) * (255 / (2 * np.pi))
+ angle = ((angle + np.pi) * (self.angle_relative / (2 * np.pi))) + self.angle_min
+
+ full = np.full(angle.shape, fill_value=255)
- image = np.stack((amplitude, angle, amplitude), axis=-1)
+ image = np.stack((amplitude, angle, full), axis=-1)
image = np.array([image], dtype=np.uint8)
+ image = cv.cvtColor(image, cv.COLOR_HSV2BGR)
+
return image
def istft(
image: np.ndarray
) -> np.ndarray:
+ image = cv.cvtColor(image, cv.COLOR_BGR2HSV)
+
amplitude = image[0][...,0].astype(np.float64)
angle = image[0][...,1].astype(np.float64)
- amplitude /= (255 / self.upper_limit)
+ amplitude -= self.amplitude_min
+ amplitude /= (self.amplitude_relative / self.upper_limit)
amplitude += self.lower_limit
amplitude = np.power(10, amplitude / 20)
- angle = (angle / (255 / (2 * np.pi))) - np.pi
+ angle = ((angle - self.angle_min) / (self.angle_relative / (2 * np.pi))) - np.pi
real = np.cos(angle) * amplitude
imag = np.sin(angle) * amplitude
return data
sample_rate, data = wavfile.read("/home/will/Music/George Michael - Careless Whisper.wav")
+new_rate = 22_050.
+
+sample_count = round(len(data) * new_rate / sample_rate)
+data = sps.resample(data, sample_count)
+sample_rate = int(new_rate)
+
+data = [data[i] for i in range(0, len(data), 2)]
+sample_rate = sample_rate // 2
window_size = 1_000
window_height = 500
cv.imshow("display", spectrum)
cv.waitKey(1)
- rows = [[i] for i in spectrum]
+ rows = [np.array([i]) for i in spectrum]
with Pool() as p:
recovered = np.array(p.map(transform.istft, rows))
wavfile.write("out.wav", sample_rate, recovered_data.astype(np.int16))
-difference = data - recovered_data
+difference = (data - recovered_data)[1000:251000]
plt.style.use('dark_background')
fig, (ax1, ax2) = plt.subplots(nrows=2)