From: will <greenwoodw50@gmail.com>
Date: Wed, 13 Nov 2024 20:33:45 +0000 (+0000)
Subject: tidying up
X-Git-Url: https://git.ozva.co.uk/?a=commitdiff_plain;h=HEAD;p=audio-over-stft

tidying up
---

diff --git a/camera.py b/camera.py
index d1684d0..208c01c 100755
--- a/camera.py
+++ b/camera.py
@@ -118,8 +118,8 @@ class camera():
 		debug: bool = False
 	) -> None:
 
-		self.last_display = image
 		image = cv.cvtColor(image, cv.COLOR_HSV2BGR)
+		self.last_display = image
 		image = cv.resize(image, self.display_size, interpolation=cv.INTER_NEAREST_EXACT)
 
 		if debug:
@@ -143,10 +143,10 @@ class camera():
 				image = cv.warpPerspective(image, self.homography, self.display_size)
 				image = cv.resize(image, (self.window_size, self.window_height))
 
-				image = cv.cvtColor(image, cv.COLOR_BGR2HSV)
-
 		self.last_recovered = image
 
+		image = cv.cvtColor(image, cv.COLOR_BGR2HSV)
+
 		if self.show_debug == True:
 			self.debug()
 
diff --git a/fft.py b/fft.py
index fc14e25..348f354 100755
--- a/fft.py
+++ b/fft.py
@@ -3,6 +3,9 @@
 import numpy as np
 import cv2 as cv
 
+from scipy.io import wavfile
+import matplotlib.pyplot as plt
+
 """
 Notes:
 	The data is receved in int16 format (limits -32768 to 32767). It is converted to a complex array from which we can work out power. This is from 0 to over 10,000. It is converted to decibels [1].
@@ -35,7 +38,7 @@ class fft():
 		self.angle_max = 254
 		self.angle_min = 0
 
-		self.volume_max = 100
+		self.volume_max = 65
 		self.volume_min = -40
 
 		# calulate the range of each amplitude and angle
@@ -63,8 +66,8 @@ class fft():
 			spectrum = np.fft.fft(segment) / self.window_size
 
 			# convert the vector length to decimals and confine
-			amplitude = np.abs(spectrum)
-			amplitude = 20*np.log10(amplitude)
+			orig_amplitude = np.abs(spectrum)
+			amplitude = 20*np.log10(orig_amplitude)
 			amplitude = np.clip(amplitude, self.volume_min, self.volume_max)
 			amplitude = ((amplitude - self.a) / self.b) + self.c
 
@@ -93,8 +96,8 @@ class fft():
 			angle = image[0][...,1].astype(np.float64)
 
 			# Use hue as seperate data point
-			#hue = image[0][...,0].astype(np.float64) * (255/180)
-			#amplitude = np.mean( np.array([ amplitude, hue ]), axis=0 ).astype(np.uint8)
+			hue = image[0][...,0].astype(np.float64) * (255/180)
+			amplitude = np.mean( np.array([ amplitude, hue ]), axis=0 ).astype(np.uint8)
 
 			# convert amplitude back into vector length
 			amplitude = self.log_lookup[amplitude]
diff --git a/loop.py b/loop.py
index 3665bc8..bfc1919 100755
--- a/loop.py
+++ b/loop.py
@@ -39,9 +39,9 @@ def get_audio(
 	data = sps.resample(data, sample_count)
 
 	# make divisisible into screens
-	segment_samples = window_height * (window_height // 2)
+	segment_samples = window_height * (window_size // 2)
 	overflow_samples = segment_samples - (len(data) % segment_samples) + window_size
-	data = np.concatenate((data, data[0:overflow_samples]))
+	data = np.concatenate((data, np.zeros((overflow_samples,))))
 
 	return data
 
@@ -79,6 +79,9 @@ def process_loop(
 	global audio
 	global caching
 
+	print("caching data...")
+	caching = True
+
 	hop_size = window_size // 2
 	segment_samples = window_height * hop_size
 
@@ -87,8 +90,6 @@ def process_loop(
 	error_array = np.zeros((5, window_size))
 	error_spectrum = np.zeros((window_height, window_size, 3))
 
-	print("caching data...")
-	caching = True
 
 	all_spectrums = np.zeros((segment_count, window_height, window_size, 3), dtype=np.uint8)
 
@@ -100,18 +101,19 @@ def process_loop(
 			mapping = p.map(transform.stft, segment_rows)
 
 		spectrum = np.array(mapping)[:,0,...]
-		if correction_array is not None:
-			spectrum -= correction_array
-			spectrum = np.clip(spectrum, 0, 255)
+		# if correction_array is not None:
+			# spectrum -= correction_array
+			# spectrum = np.clip(spectrum, 0, 255)
 
 		spectrum = spectrum.astype(np.uint8)
 
 		# print sample image
-		if segment_index == 10:
-			cv.imwrite("spectrum_sample.jpg", spectrum)
+		# if segment_index == 0:
+		# 	image = cv.cvtColor(spectrum, cv.COLOR_HSV2BGR)
+		# 	image = cv.resize(image, (3508, 2480), interpolation=cv.INTER_NEAREST_EXACT)
+		# 	cv.imwrite("spectrum_sample.jpg", image)
 
 		all_spectrums[segment_index] = spectrum
-		#np.save(f"cache/frame{segment_index}.npy", spectrum)
 
 	segment_index = 0
 	print("cached!")
@@ -119,7 +121,7 @@ def process_loop(
 
 	while segment_index < segment_count:
 
-		spectrum = all_spectrums[segment_index]#np.load(f"cache/frame{segment_index}.npy")
+		spectrum = all_spectrums[segment_index]
 
 		# display and capture
 		camera.display(spectrum)
@@ -175,9 +177,9 @@ notes:
 """
 
 # define parameters
-sample_rate = 22_050
-window_size = 144
-window_height = 80
+sample_rate = 16_000
+window_size = 192
+window_height = 100
 
 hop_size = window_size // 2
 
@@ -186,7 +188,7 @@ caching = False
 if __name__ == "__main__":
 
 	# get audio data
-	data = get_audio("/home/will/Downloads/Adducci - Around the Horn.wav", window_size, window_height, sample_rate)
+	data = get_audio("/home/will/Downloads/The Killing Moon.wav", window_size, window_height, sample_rate)
 
 	# setup fft
 	transform = fft(window_size)
@@ -196,9 +198,9 @@ if __name__ == "__main__":
 		window_size,
 		window_height,
 		(1920, 1080),
-		device_id = 2,
-		debug = False,
-		dummy = True
+		device_id = 0,
+		debug = True,
+		dummy = False
 	)
 	camera.calibrate()
 
@@ -226,6 +228,7 @@ if __name__ == "__main__":
 	)
 
 	try:
+
 		print("performing error correction...")
 		silence = np.full(((10 * window_size * window_height) + window_size,), fill_value=1, dtype=np.int16)
 		correction_array = process_loop(silence, transform, camera, window_size, window_height, loop = False, correction = True)
diff --git a/test.py b/test.py
new file mode 100755
index 0000000..9e376ae
--- /dev/null
+++ b/test.py
@@ -0,0 +1,44 @@
+#!./.venv/bin/python
+
+import numpy as np
+import matplotlib.pyplot as plt
+import time
+import math
+import pyfftw
+
+def test(func, length):
+    results = []
+    for i in range(100):
+        a = np.random.randint(0, 255, (1, length, 3), dtype=np.uint8)
+        t = time.time()
+        r = func(a)
+        results += [time.time() - t]
+
+    return sum(results) / len(results)
+
+def f1(a):
+    a = np.fft.fft(a)
+    a = np.fft.ifft(a)
+
+def f2(a):
+    a = pyfftw.interfaces.numpy_fft.fft(a)
+    a = pyfftw.interfaces.numpy_fft.ifft(a)
+
+lengths = list(range(70, 1000000, 10000))
+
+print(len(lengths))
+f1_results = [test(f1, i) for i in lengths]
+f2_results = [test(f2, i) for i in lengths]
+fig, (ax0, ax1) = plt.subplots(2, 1, layout='constrained')
+
+ax0.plot(f1_results, lengths, "-b", label="numpy")
+ax0.plot(f2_results, lengths, "-r", label="ptfftw")
+ax0.set_xlabel('Time (ms)')
+ax0.set_ylabel('Array length')
+ax0.legend(loc="upper left")
+
+ax1.plot(100 - ((np.array(f2_results) / np.array(f1_results)) * 100), lengths, "-g")
+ax1.set_xlabel('Speed-up (%)')
+ax1.set_ylabel('Array length')
+
+plt.show()