]> OzVa Git service - audio-over-stft/commitdiff
changed to hsv, changed the test file to test the audio io system, added variable...
authorwill <greenwoodw50@gmail.com>
Fri, 28 Jun 2024 14:26:56 +0000 (15:26 +0100)
committerwill <greenwoodw50@gmail.com>
Fri, 28 Jun 2024 14:26:56 +0000 (15:26 +0100)
error.png [new file with mode: 0644]
main.py
out.wav
test.py

diff --git a/error.png b/error.png
new file mode 100644 (file)
index 0000000..1aeedb8
Binary files /dev/null and b/error.png differ
diff --git a/main.py b/main.py
index 83a73cd3411f59a235d42740c6e4f80a063f5297..229a797cf453494806970274e797ba1c059e6833 100644 (file)
--- a/main.py
+++ b/main.py
@@ -1,6 +1,7 @@
 import cv2 as cv
 import numpy as np
 from scipy.io import wavfile
+import scipy.signal as sps
 import matplotlib.pyplot as plt
 from multiprocessing import Pool
 
@@ -98,6 +99,15 @@ class fft():
                self.lower_limit = -40
                self.upper_limit = 100
 
+               self.amplitude_max = 180
+               self.amplitude_min = 0
+               self.angle_max = 255
+               self.angle_min = 100
+
+               self.amplitude_relative = self.amplitude_max - self.amplitude_min
+               self.angle_relative = self.angle_max - self.angle_min
+
+
        def stft(
                self,
                data: np.ndarray
@@ -110,14 +120,18 @@ class fft():
                amplitude = 20*np.log10(amplitude)
                amplitude = np.clip(amplitude, self.lower_limit, self.upper_limit)
                amplitude -= self.lower_limit
-               amplitude *= (255 / self.upper_limit)
+               amplitude *= (self.amplitude_relative / self.upper_limit) + self.amplitude_min
 
                angle = np.angle(spectrum)
-               angle = (angle + np.pi) * (255 / (2 * np.pi))
+               angle = ((angle + np.pi) * (self.angle_relative / (2 * np.pi))) + self.angle_min
+
+               full = np.full(angle.shape, fill_value=255)
 
-               image = np.stack((amplitude, angle, amplitude), axis=-1)
+               image = np.stack((amplitude, angle, full), axis=-1)
                image = np.array([image], dtype=np.uint8)
 
+               image = cv.cvtColor(image, cv.COLOR_HSV2BGR)
+
                return image
 
        def istft(
@@ -125,14 +139,17 @@ class fft():
                image: np.ndarray
        ) -> np.ndarray:
 
+               image = cv.cvtColor(image, cv.COLOR_BGR2HSV)
+
                amplitude = image[0][...,0].astype(np.float64)
                angle = image[0][...,1].astype(np.float64)
 
-               amplitude /= (255 / self.upper_limit)
+               amplitude -= self.amplitude_min
+               amplitude /= (self.amplitude_relative / self.upper_limit)
                amplitude += self.lower_limit
                amplitude = np.power(10, amplitude / 20)
 
-               angle = (angle / (255 / (2 * np.pi))) - np.pi
+               angle = ((angle - self.angle_min) / (self.angle_relative / (2 * np.pi))) - np.pi
 
                real = np.cos(angle) * amplitude
                imag = np.sin(angle) * amplitude
@@ -143,6 +160,14 @@ class fft():
                return data
 
 sample_rate, data = wavfile.read("/home/will/Music/George Michael - Careless Whisper.wav")
+new_rate = 22_050.
+
+sample_count = round(len(data) * new_rate / sample_rate)
+data = sps.resample(data, sample_count)
+sample_rate = int(new_rate)
+
+data = [data[i] for i in range(0, len(data), 2)]
+sample_rate = sample_rate // 2
 
 window_size = 1_000
 window_height = 500
@@ -172,7 +197,7 @@ for segment_index in range(segment_count):
        cv.imshow("display", spectrum)
        cv.waitKey(1)
 
-       rows = [[i] for i in spectrum]
+       rows = [np.array([i]) for i in spectrum]
        with Pool() as p:
                recovered = np.array(p.map(transform.istft, rows))
 
@@ -182,7 +207,7 @@ for segment_index in range(segment_count):
 
 wavfile.write("out.wav", sample_rate, recovered_data.astype(np.int16))
 
-difference = data - recovered_data
+difference = (data - recovered_data)[1000:251000]
 
 plt.style.use('dark_background')
 fig, (ax1, ax2) = plt.subplots(nrows=2)
diff --git a/out.wav b/out.wav
index 232ced38360e20754e1feecac5788d45030e3fb6..baf075e524d1e50868d1215e43557b5c46fb1ae6 100644 (file)
Binary files a/out.wav and b/out.wav differ
diff --git a/test.py b/test.py
index bce9ae7117279f2e253ce3f4973b1ae2a154000e..89e73be7692e09441e4bc774a3b4783b38e751d6 100644 (file)
--- a/test.py
+++ b/test.py
@@ -1,5 +1,26 @@
-import cv2 as cv
+import wave
 
-image = cv.imread("calibration/calibration.jpg")
+import pyaudio
 
-cv.imwrite("calibration/calibration.jpg")
+def handler (in_data, frame_count, time_info, status):
+       print(in_data)
+       print(frame_count)
+       print(time_info)
+       print(status)
+
+       (in_data, pyaudio.paContinue)
+
+CHUNK = 1024
+
+p = pyaudio.PyAudio()
+
+stream = p.open(
+       format=pyaudio.paInt8,
+       channels=1,
+       rate=22_050,
+       input=True,
+       stream_callback=handler
+)
+
+stream.close()
+p.terminate()