]> OzVa Git service - audio-over-stft/commitdiff
tidying up main
authorwill <greenwoodw50@gmail.com>
Wed, 13 Nov 2024 20:33:45 +0000 (20:33 +0000)
committerwill <greenwoodw50@gmail.com>
Wed, 13 Nov 2024 20:33:45 +0000 (20:33 +0000)
camera.py
fft.py
loop.py
test.py [new file with mode: 0755]

index d1684d05ea371511e7be75b73082dad3d2b53b87..208c01c855a385a5434effb903054934ec5bb9ec 100755 (executable)
--- a/camera.py
+++ b/camera.py
@@ -118,8 +118,8 @@ class camera():
                debug: bool = False
        ) -> None:
 
                debug: bool = False
        ) -> None:
 
-               self.last_display = image
                image = cv.cvtColor(image, cv.COLOR_HSV2BGR)
                image = cv.cvtColor(image, cv.COLOR_HSV2BGR)
+               self.last_display = image
                image = cv.resize(image, self.display_size, interpolation=cv.INTER_NEAREST_EXACT)
 
                if debug:
                image = cv.resize(image, self.display_size, interpolation=cv.INTER_NEAREST_EXACT)
 
                if debug:
@@ -143,10 +143,10 @@ class camera():
                                image = cv.warpPerspective(image, self.homography, self.display_size)
                                image = cv.resize(image, (self.window_size, self.window_height))
 
                                image = cv.warpPerspective(image, self.homography, self.display_size)
                                image = cv.resize(image, (self.window_size, self.window_height))
 
-                               image = cv.cvtColor(image, cv.COLOR_BGR2HSV)
-
                self.last_recovered = image
 
                self.last_recovered = image
 
+               image = cv.cvtColor(image, cv.COLOR_BGR2HSV)
+
                if self.show_debug == True:
                        self.debug()
 
                if self.show_debug == True:
                        self.debug()
 
diff --git a/fft.py b/fft.py
index fc14e258a6c7b0b542a62103bebaa72db73b71d2..348f3543c483e79b5dd5809fe613d21f71ddacfc 100755 (executable)
--- a/fft.py
+++ b/fft.py
@@ -3,6 +3,9 @@
 import numpy as np
 import cv2 as cv
 
 import numpy as np
 import cv2 as cv
 
+from scipy.io import wavfile
+import matplotlib.pyplot as plt
+
 """
 Notes:
        The data is receved in int16 format (limits -32768 to 32767). It is converted to a complex array from which we can work out power. This is from 0 to over 10,000. It is converted to decibels [1].
 """
 Notes:
        The data is receved in int16 format (limits -32768 to 32767). It is converted to a complex array from which we can work out power. This is from 0 to over 10,000. It is converted to decibels [1].
@@ -35,7 +38,7 @@ class fft():
                self.angle_max = 254
                self.angle_min = 0
 
                self.angle_max = 254
                self.angle_min = 0
 
-               self.volume_max = 100
+               self.volume_max = 65
                self.volume_min = -40
 
                # calulate the range of each amplitude and angle
                self.volume_min = -40
 
                # calulate the range of each amplitude and angle
@@ -63,8 +66,8 @@ class fft():
                        spectrum = np.fft.fft(segment) / self.window_size
 
                        # convert the vector length to decimals and confine
                        spectrum = np.fft.fft(segment) / self.window_size
 
                        # convert the vector length to decimals and confine
-                       amplitude = np.abs(spectrum)
-                       amplitude = 20*np.log10(amplitude)
+                       orig_amplitude = np.abs(spectrum)
+                       amplitude = 20*np.log10(orig_amplitude)
                        amplitude = np.clip(amplitude, self.volume_min, self.volume_max)
                        amplitude = ((amplitude - self.a) / self.b) + self.c
 
                        amplitude = np.clip(amplitude, self.volume_min, self.volume_max)
                        amplitude = ((amplitude - self.a) / self.b) + self.c
 
@@ -93,8 +96,8 @@ class fft():
                        angle = image[0][...,1].astype(np.float64)
 
                        # Use hue as seperate data point
                        angle = image[0][...,1].astype(np.float64)
 
                        # Use hue as seperate data point
-                       #hue = image[0][...,0].astype(np.float64) * (255/180)
-                       #amplitude = np.mean( np.array([ amplitude, hue ]), axis=0 ).astype(np.uint8)
+                       hue = image[0][...,0].astype(np.float64) * (255/180)
+                       amplitude = np.mean( np.array([ amplitude, hue ]), axis=0 ).astype(np.uint8)
 
                        # convert amplitude back into vector length
                        amplitude = self.log_lookup[amplitude]
 
                        # convert amplitude back into vector length
                        amplitude = self.log_lookup[amplitude]
diff --git a/loop.py b/loop.py
index 3665bc8789deb5b68472f3a93bf2eb24b67ad9b5..bfc1919cbdbff57674fa782ae38fef11b397d919 100755 (executable)
--- a/loop.py
+++ b/loop.py
@@ -39,9 +39,9 @@ def get_audio(
        data = sps.resample(data, sample_count)
 
        # make divisisible into screens
        data = sps.resample(data, sample_count)
 
        # make divisisible into screens
-       segment_samples = window_height * (window_height // 2)
+       segment_samples = window_height * (window_size // 2)
        overflow_samples = segment_samples - (len(data) % segment_samples) + window_size
        overflow_samples = segment_samples - (len(data) % segment_samples) + window_size
-       data = np.concatenate((data, data[0:overflow_samples]))
+       data = np.concatenate((data, np.zeros((overflow_samples,))))
 
        return data
 
 
        return data
 
@@ -79,6 +79,9 @@ def process_loop(
        global audio
        global caching
 
        global audio
        global caching
 
+       print("caching data...")
+       caching = True
+
        hop_size = window_size // 2
        segment_samples = window_height * hop_size
 
        hop_size = window_size // 2
        segment_samples = window_height * hop_size
 
@@ -87,8 +90,6 @@ def process_loop(
        error_array = np.zeros((5, window_size))
        error_spectrum = np.zeros((window_height, window_size, 3))
 
        error_array = np.zeros((5, window_size))
        error_spectrum = np.zeros((window_height, window_size, 3))
 
-       print("caching data...")
-       caching = True
 
        all_spectrums = np.zeros((segment_count, window_height, window_size, 3), dtype=np.uint8)
 
 
        all_spectrums = np.zeros((segment_count, window_height, window_size, 3), dtype=np.uint8)
 
@@ -100,18 +101,19 @@ def process_loop(
                        mapping = p.map(transform.stft, segment_rows)
 
                spectrum = np.array(mapping)[:,0,...]
                        mapping = p.map(transform.stft, segment_rows)
 
                spectrum = np.array(mapping)[:,0,...]
-               if correction_array is not None:
-                       spectrum -= correction_array
-                       spectrum = np.clip(spectrum, 0, 255)
+               if correction_array is not None:
+                       spectrum -= correction_array
+                       spectrum = np.clip(spectrum, 0, 255)
 
                spectrum = spectrum.astype(np.uint8)
 
                # print sample image
 
                spectrum = spectrum.astype(np.uint8)
 
                # print sample image
-               if segment_index == 10:
-                       cv.imwrite("spectrum_sample.jpg", spectrum)
+               # if segment_index == 0:
+               #       image = cv.cvtColor(spectrum, cv.COLOR_HSV2BGR)
+               #       image = cv.resize(image, (3508, 2480), interpolation=cv.INTER_NEAREST_EXACT)
+               #       cv.imwrite("spectrum_sample.jpg", image)
 
                all_spectrums[segment_index] = spectrum
 
                all_spectrums[segment_index] = spectrum
-               #np.save(f"cache/frame{segment_index}.npy", spectrum)
 
        segment_index = 0
        print("cached!")
 
        segment_index = 0
        print("cached!")
@@ -119,7 +121,7 @@ def process_loop(
 
        while segment_index < segment_count:
 
 
        while segment_index < segment_count:
 
-               spectrum = all_spectrums[segment_index]#np.load(f"cache/frame{segment_index}.npy")
+               spectrum = all_spectrums[segment_index]
 
                # display and capture
                camera.display(spectrum)
 
                # display and capture
                camera.display(spectrum)
@@ -175,9 +177,9 @@ notes:
 """
 
 # define parameters
 """
 
 # define parameters
-sample_rate = 22_050
-window_size = 144
-window_height = 80
+sample_rate = 16_000
+window_size = 192
+window_height = 100
 
 hop_size = window_size // 2
 
 
 hop_size = window_size // 2
 
@@ -186,7 +188,7 @@ caching = False
 if __name__ == "__main__":
 
        # get audio data
 if __name__ == "__main__":
 
        # get audio data
-       data = get_audio("/home/will/Downloads/Adducci - Around the Horn.wav", window_size, window_height, sample_rate)
+       data = get_audio("/home/will/Downloads/The Killing Moon.wav", window_size, window_height, sample_rate)
 
        # setup fft
        transform = fft(window_size)
 
        # setup fft
        transform = fft(window_size)
@@ -196,9 +198,9 @@ if __name__ == "__main__":
                window_size,
                window_height,
                (1920, 1080),
                window_size,
                window_height,
                (1920, 1080),
-               device_id = 2,
-               debug = False,
-               dummy = True
+               device_id = 0,
+               debug = True,
+               dummy = False
        )
        camera.calibrate()
 
        )
        camera.calibrate()
 
@@ -226,6 +228,7 @@ if __name__ == "__main__":
        )
 
        try:
        )
 
        try:
+
                print("performing error correction...")
                silence = np.full(((10 * window_size * window_height) + window_size,), fill_value=1, dtype=np.int16)
                correction_array = process_loop(silence, transform, camera, window_size, window_height, loop = False, correction = True)
                print("performing error correction...")
                silence = np.full(((10 * window_size * window_height) + window_size,), fill_value=1, dtype=np.int16)
                correction_array = process_loop(silence, transform, camera, window_size, window_height, loop = False, correction = True)
diff --git a/test.py b/test.py
new file mode 100755 (executable)
index 0000000..9e376ae
--- /dev/null
+++ b/test.py
@@ -0,0 +1,44 @@
+#!./.venv/bin/python
+
+import numpy as np
+import matplotlib.pyplot as plt
+import time
+import math
+import pyfftw
+
+def test(func, length):
+    results = []
+    for i in range(100):
+        a = np.random.randint(0, 255, (1, length, 3), dtype=np.uint8)
+        t = time.time()
+        r = func(a)
+        results += [time.time() - t]
+
+    return sum(results) / len(results)
+
+def f1(a):
+    a = np.fft.fft(a)
+    a = np.fft.ifft(a)
+
+def f2(a):
+    a = pyfftw.interfaces.numpy_fft.fft(a)
+    a = pyfftw.interfaces.numpy_fft.ifft(a)
+
+lengths = list(range(70, 1000000, 10000))
+
+print(len(lengths))
+f1_results = [test(f1, i) for i in lengths]
+f2_results = [test(f2, i) for i in lengths]
+fig, (ax0, ax1) = plt.subplots(2, 1, layout='constrained')
+
+ax0.plot(f1_results, lengths, "-b", label="numpy")
+ax0.plot(f2_results, lengths, "-r", label="ptfftw")
+ax0.set_xlabel('Time (ms)')
+ax0.set_ylabel('Array length')
+ax0.legend(loc="upper left")
+
+ax1.plot(100 - ((np.array(f2_results) / np.array(f1_results)) * 100), lengths, "-g")
+ax1.set_xlabel('Speed-up (%)')
+ax1.set_ylabel('Array length')
+
+plt.show()