From: will Date: Wed, 13 Nov 2024 20:33:45 +0000 (+0000) Subject: tidying up X-Git-Url: https://git.ozva.co.uk/?a=commitdiff_plain;h=HEAD;p=audio-over-stft tidying up --- diff --git a/camera.py b/camera.py index d1684d0..208c01c 100755 --- a/camera.py +++ b/camera.py @@ -118,8 +118,8 @@ class camera(): debug: bool = False ) -> None: - self.last_display = image image = cv.cvtColor(image, cv.COLOR_HSV2BGR) + self.last_display = image image = cv.resize(image, self.display_size, interpolation=cv.INTER_NEAREST_EXACT) if debug: @@ -143,10 +143,10 @@ class camera(): image = cv.warpPerspective(image, self.homography, self.display_size) image = cv.resize(image, (self.window_size, self.window_height)) - image = cv.cvtColor(image, cv.COLOR_BGR2HSV) - self.last_recovered = image + image = cv.cvtColor(image, cv.COLOR_BGR2HSV) + if self.show_debug == True: self.debug() diff --git a/fft.py b/fft.py index fc14e25..348f354 100755 --- a/fft.py +++ b/fft.py @@ -3,6 +3,9 @@ import numpy as np import cv2 as cv +from scipy.io import wavfile +import matplotlib.pyplot as plt + """ Notes: The data is receved in int16 format (limits -32768 to 32767). It is converted to a complex array from which we can work out power. This is from 0 to over 10,000. It is converted to decibels [1]. @@ -35,7 +38,7 @@ class fft(): self.angle_max = 254 self.angle_min = 0 - self.volume_max = 100 + self.volume_max = 65 self.volume_min = -40 # calulate the range of each amplitude and angle @@ -63,8 +66,8 @@ class fft(): spectrum = np.fft.fft(segment) / self.window_size # convert the vector length to decimals and confine - amplitude = np.abs(spectrum) - amplitude = 20*np.log10(amplitude) + orig_amplitude = np.abs(spectrum) + amplitude = 20*np.log10(orig_amplitude) amplitude = np.clip(amplitude, self.volume_min, self.volume_max) amplitude = ((amplitude - self.a) / self.b) + self.c @@ -93,8 +96,8 @@ class fft(): angle = image[0][...,1].astype(np.float64) # Use hue as seperate data point - #hue = image[0][...,0].astype(np.float64) * (255/180) - #amplitude = np.mean( np.array([ amplitude, hue ]), axis=0 ).astype(np.uint8) + hue = image[0][...,0].astype(np.float64) * (255/180) + amplitude = np.mean( np.array([ amplitude, hue ]), axis=0 ).astype(np.uint8) # convert amplitude back into vector length amplitude = self.log_lookup[amplitude] diff --git a/loop.py b/loop.py index 3665bc8..bfc1919 100755 --- a/loop.py +++ b/loop.py @@ -39,9 +39,9 @@ def get_audio( data = sps.resample(data, sample_count) # make divisisible into screens - segment_samples = window_height * (window_height // 2) + segment_samples = window_height * (window_size // 2) overflow_samples = segment_samples - (len(data) % segment_samples) + window_size - data = np.concatenate((data, data[0:overflow_samples])) + data = np.concatenate((data, np.zeros((overflow_samples,)))) return data @@ -79,6 +79,9 @@ def process_loop( global audio global caching + print("caching data...") + caching = True + hop_size = window_size // 2 segment_samples = window_height * hop_size @@ -87,8 +90,6 @@ def process_loop( error_array = np.zeros((5, window_size)) error_spectrum = np.zeros((window_height, window_size, 3)) - print("caching data...") - caching = True all_spectrums = np.zeros((segment_count, window_height, window_size, 3), dtype=np.uint8) @@ -100,18 +101,19 @@ def process_loop( mapping = p.map(transform.stft, segment_rows) spectrum = np.array(mapping)[:,0,...] - if correction_array is not None: - spectrum -= correction_array - spectrum = np.clip(spectrum, 0, 255) + # if correction_array is not None: + # spectrum -= correction_array + # spectrum = np.clip(spectrum, 0, 255) spectrum = spectrum.astype(np.uint8) # print sample image - if segment_index == 10: - cv.imwrite("spectrum_sample.jpg", spectrum) + # if segment_index == 0: + # image = cv.cvtColor(spectrum, cv.COLOR_HSV2BGR) + # image = cv.resize(image, (3508, 2480), interpolation=cv.INTER_NEAREST_EXACT) + # cv.imwrite("spectrum_sample.jpg", image) all_spectrums[segment_index] = spectrum - #np.save(f"cache/frame{segment_index}.npy", spectrum) segment_index = 0 print("cached!") @@ -119,7 +121,7 @@ def process_loop( while segment_index < segment_count: - spectrum = all_spectrums[segment_index]#np.load(f"cache/frame{segment_index}.npy") + spectrum = all_spectrums[segment_index] # display and capture camera.display(spectrum) @@ -175,9 +177,9 @@ notes: """ # define parameters -sample_rate = 22_050 -window_size = 144 -window_height = 80 +sample_rate = 16_000 +window_size = 192 +window_height = 100 hop_size = window_size // 2 @@ -186,7 +188,7 @@ caching = False if __name__ == "__main__": # get audio data - data = get_audio("/home/will/Downloads/Adducci - Around the Horn.wav", window_size, window_height, sample_rate) + data = get_audio("/home/will/Downloads/The Killing Moon.wav", window_size, window_height, sample_rate) # setup fft transform = fft(window_size) @@ -196,9 +198,9 @@ if __name__ == "__main__": window_size, window_height, (1920, 1080), - device_id = 2, - debug = False, - dummy = True + device_id = 0, + debug = True, + dummy = False ) camera.calibrate() @@ -226,6 +228,7 @@ if __name__ == "__main__": ) try: + print("performing error correction...") silence = np.full(((10 * window_size * window_height) + window_size,), fill_value=1, dtype=np.int16) correction_array = process_loop(silence, transform, camera, window_size, window_height, loop = False, correction = True) diff --git a/test.py b/test.py new file mode 100755 index 0000000..9e376ae --- /dev/null +++ b/test.py @@ -0,0 +1,44 @@ +#!./.venv/bin/python + +import numpy as np +import matplotlib.pyplot as plt +import time +import math +import pyfftw + +def test(func, length): + results = [] + for i in range(100): + a = np.random.randint(0, 255, (1, length, 3), dtype=np.uint8) + t = time.time() + r = func(a) + results += [time.time() - t] + + return sum(results) / len(results) + +def f1(a): + a = np.fft.fft(a) + a = np.fft.ifft(a) + +def f2(a): + a = pyfftw.interfaces.numpy_fft.fft(a) + a = pyfftw.interfaces.numpy_fft.ifft(a) + +lengths = list(range(70, 1000000, 10000)) + +print(len(lengths)) +f1_results = [test(f1, i) for i in lengths] +f2_results = [test(f2, i) for i in lengths] +fig, (ax0, ax1) = plt.subplots(2, 1, layout='constrained') + +ax0.plot(f1_results, lengths, "-b", label="numpy") +ax0.plot(f2_results, lengths, "-r", label="ptfftw") +ax0.set_xlabel('Time (ms)') +ax0.set_ylabel('Array length') +ax0.legend(loc="upper left") + +ax1.plot(100 - ((np.array(f2_results) / np.array(f1_results)) * 100), lengths, "-g") +ax1.set_xlabel('Speed-up (%)') +ax1.set_ylabel('Array length') + +plt.show()