From: will Date: Fri, 28 Jun 2024 14:26:56 +0000 (+0100) Subject: changed to hsv, changed the test file to test the audio io system, added variable... X-Git-Url: https://git.ozva.co.uk/?a=commitdiff_plain;h=72324dec6790b6dd06ebff01f8584b4c9e12d75e;p=audio-over-stft changed to hsv, changed the test file to test the audio io system, added variable sample rate system --- diff --git a/error.png b/error.png new file mode 100644 index 0000000..1aeedb8 Binary files /dev/null and b/error.png differ diff --git a/main.py b/main.py index 83a73cd..229a797 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ import cv2 as cv import numpy as np from scipy.io import wavfile +import scipy.signal as sps import matplotlib.pyplot as plt from multiprocessing import Pool @@ -98,6 +99,15 @@ class fft(): self.lower_limit = -40 self.upper_limit = 100 + self.amplitude_max = 180 + self.amplitude_min = 0 + self.angle_max = 255 + self.angle_min = 100 + + self.amplitude_relative = self.amplitude_max - self.amplitude_min + self.angle_relative = self.angle_max - self.angle_min + + def stft( self, data: np.ndarray @@ -110,14 +120,18 @@ class fft(): amplitude = 20*np.log10(amplitude) amplitude = np.clip(amplitude, self.lower_limit, self.upper_limit) amplitude -= self.lower_limit - amplitude *= (255 / self.upper_limit) + amplitude *= (self.amplitude_relative / self.upper_limit) + self.amplitude_min angle = np.angle(spectrum) - angle = (angle + np.pi) * (255 / (2 * np.pi)) + angle = ((angle + np.pi) * (self.angle_relative / (2 * np.pi))) + self.angle_min + + full = np.full(angle.shape, fill_value=255) - image = np.stack((amplitude, angle, amplitude), axis=-1) + image = np.stack((amplitude, angle, full), axis=-1) image = np.array([image], dtype=np.uint8) + image = cv.cvtColor(image, cv.COLOR_HSV2BGR) + return image def istft( @@ -125,14 +139,17 @@ class fft(): image: np.ndarray ) -> np.ndarray: + image = cv.cvtColor(image, cv.COLOR_BGR2HSV) + amplitude = image[0][...,0].astype(np.float64) angle = image[0][...,1].astype(np.float64) - amplitude /= (255 / self.upper_limit) + amplitude -= self.amplitude_min + amplitude /= (self.amplitude_relative / self.upper_limit) amplitude += self.lower_limit amplitude = np.power(10, amplitude / 20) - angle = (angle / (255 / (2 * np.pi))) - np.pi + angle = ((angle - self.angle_min) / (self.angle_relative / (2 * np.pi))) - np.pi real = np.cos(angle) * amplitude imag = np.sin(angle) * amplitude @@ -143,6 +160,14 @@ class fft(): return data sample_rate, data = wavfile.read("/home/will/Music/George Michael - Careless Whisper.wav") +new_rate = 22_050. + +sample_count = round(len(data) * new_rate / sample_rate) +data = sps.resample(data, sample_count) +sample_rate = int(new_rate) + +data = [data[i] for i in range(0, len(data), 2)] +sample_rate = sample_rate // 2 window_size = 1_000 window_height = 500 @@ -172,7 +197,7 @@ for segment_index in range(segment_count): cv.imshow("display", spectrum) cv.waitKey(1) - rows = [[i] for i in spectrum] + rows = [np.array([i]) for i in spectrum] with Pool() as p: recovered = np.array(p.map(transform.istft, rows)) @@ -182,7 +207,7 @@ for segment_index in range(segment_count): wavfile.write("out.wav", sample_rate, recovered_data.astype(np.int16)) -difference = data - recovered_data +difference = (data - recovered_data)[1000:251000] plt.style.use('dark_background') fig, (ax1, ax2) = plt.subplots(nrows=2) diff --git a/out.wav b/out.wav index 232ced3..baf075e 100644 Binary files a/out.wav and b/out.wav differ diff --git a/test.py b/test.py index bce9ae7..89e73be 100644 --- a/test.py +++ b/test.py @@ -1,5 +1,26 @@ -import cv2 as cv +import wave -image = cv.imread("calibration/calibration.jpg") +import pyaudio -cv.imwrite("calibration/calibration.jpg") +def handler (in_data, frame_count, time_info, status): + print(in_data) + print(frame_count) + print(time_info) + print(status) + + (in_data, pyaudio.paContinue) + +CHUNK = 1024 + +p = pyaudio.PyAudio() + +stream = p.open( + format=pyaudio.paInt8, + channels=1, + rate=22_050, + input=True, + stream_callback=handler +) + +stream.close() +p.terminate()