From: will <greenwoodw50@gmail.com>
Date: Wed, 31 Jul 2024 18:46:20 +0000 (+0100)
Subject: uhhh
X-Git-Url: https://git.ozva.co.uk/?a=commitdiff_plain;h=5c7972dbdd0825835bed26d86ad06c4f58aac687;p=audio-over-stft

uhhh
---

diff --git a/__pycache__/camera.cpython-311.pyc b/__pycache__/camera.cpython-311.pyc
new file mode 100644
index 0000000..b81a726
Binary files /dev/null and b/__pycache__/camera.cpython-311.pyc differ
diff --git a/__pycache__/camera.cpython-312.pyc b/__pycache__/camera.cpython-312.pyc
deleted file mode 100644
index f47db0f..0000000
Binary files a/__pycache__/camera.cpython-312.pyc and /dev/null differ
diff --git a/__pycache__/fft.cpython-311.pyc b/__pycache__/fft.cpython-311.pyc
new file mode 100644
index 0000000..a73cf1d
Binary files /dev/null and b/__pycache__/fft.cpython-311.pyc differ
diff --git a/__pycache__/fft.cpython-312.pyc b/__pycache__/fft.cpython-312.pyc
deleted file mode 100644
index 75fb418..0000000
Binary files a/__pycache__/fft.cpython-312.pyc and /dev/null differ
diff --git a/calibration/calibration.jpg b/calibration/calibration.jpg
index 54d79ba..74e481f 100644
Binary files a/calibration/calibration.jpg and b/calibration/calibration.jpg differ
diff --git a/camera.py b/camera.py
index d222678..155cc80 100644
--- a/camera.py
+++ b/camera.py
@@ -1,5 +1,35 @@
 import cv2 as cv
 import numpy as np
+import queue
+import threading
+
+class VideoCapture:
+	def __init__(self, device_id):
+		cv.CAP_GSTREAMER
+		self.camera = cv.VideoCapture(device_id)
+		self.camera.set(cv.CAP_PROP_FRAME_WIDTH, 1920.0)
+		self.camera.set(cv.CAP_PROP_FRAME_HEIGHT, 1080.0)
+
+		self.queue = Queue.Queue()
+		read_thread = threading.Thread(target=self.reader)
+		read_thread.daemon = True
+		read_thread.start()
+
+	# read frames as soon as they are available, keeping only most recent one
+	def reader(self):
+		while True:
+			ret, frame = self.cap.read()
+			if not ret:
+				break
+			if not self.queue.empty():
+				try:
+					self.queue.get_nowait()   # discard previous (unprocessed) frame
+				except Queue.Empty:
+					pass
+			self.queue.put(frame)
+
+	def read(self):
+		return self.queue.get()
 
 class camera():
 	def __init__(
@@ -7,27 +37,58 @@ class camera():
 		window_size: int,
 		window_height: int,
 		display_size: tuple,
-		device_id: int = 0
+		device_id: int = 0,
+		debug: bool = True,
+		dummy: bool = False
 	):
 
 		self.window_size = window_size
 		self.window_height = window_height
 		self.display_size = display_size
+		self.match_histograms = False
+		self.show_debug = debug
+		self.dummy = dummy
+
+		cv.CAP_GSTREAMER
 
 		self.camera = cv.VideoCapture(device_id)
+
+		self.camera.set(cv.CAP_PROP_BUFFERSIZE, 1)
+		self.camera.set(cv.CAP_PROP_FRAME_WIDTH, 1920.0)
+		self.camera.set(cv.CAP_PROP_FRAME_HEIGHT, 1080.0)
+
 		self.homography = None
+		self.lookup_color = None
+		self.lookup_vingette = None
+		self.lookup_compression = None
+		self.last_display = None
+		self.last_capture = None
+		self.last_recovered = None
 
 		cv.namedWindow("display", cv.WINDOW_NORMAL)
+		if self.show_debug == True:
+			cv.namedWindow("debug", cv.WINDOW_NORMAL)
+
+	def capture_raw(
+		self
+	) -> np.ndarray:
+
+		_, capture = self.camera.read()
+
+		return capture
 
 	def calibrate(
 		self
 	):
+		if self.dummy == True:
+			return
+
 		calibration_image = cv.imread("calibration/calibration.jpg")
-		calibration_image = cv.resize(calibration_image, self.display_size, cv.INTER_NEAREST)
+		calibration_image = cv.resize(calibration_image, self.display_size)
 
 		cv.imshow("display", calibration_image)
 		cv.waitKey(0)
-		_, capture = camera.read()
+		capture = self.camera.read()
 
 		# detect SIFT keypoints
 		sift = cv.SIFT_create()
@@ -55,24 +116,108 @@ class camera():
 		else:
 			print("calibration failed")
 
+	def get_lookup(
+		self
+	) -> None:
+
+		if self.dummy == True:
+			return
+
+		vingette_compression = 50
+
+		self.lookup_vingette = np.zeros((
+			255 // vingette_compression + 1, # potentially +1
+			self.window_height // vingette_compression + 1,
+			self.window_size // vingette_compression + 1
+		), dtype=np.uint8)
+
+		for v in range(0, 255, vingette_compression):
+			pixel = np.array([[[0, 0, v]]], dtype=np.uint8)
+			pixel = cv.cvtColor(pixel, cv.COLOR_HSV2BGR)
+
+			self.display(pixel)
+			capture = self.capture()
+
+			capture = cv.cvtColor(capture, cv.COLOR_BGR2HSV)
+
+			for y in range(0, self.window_height, vingette_compression):
+				for x in range(0, self.window_size, vingette_compression):
+					self.lookup_vingette[v, y, x] = capture[y, x, 2] - v
+
+		color_compression = 90
+
+		self.lookup_color = np.array((
+			180 // color_compression + 1,
+			255 // color_compression + 1,
+			255 // color_compression + 1,
+			3
+		))
+
+		for h in range(0, 180, color_compression):
+			for s in range(0, 255, color_compression):
+				for v in range(0, 255, color_compression):
+					pixel = np.array([[[h, s, v]]], dtype=np.uint8)
+					pixel = cv.cvtColor(pixel, cv.COLOR_HSV2BGR)
+
+					self.display(pixel)
+					capture = self.capture()
+
+					capture = cv.cvtColor(capture, cv.COLOR_BGR2HSV)
+
+					color = capture[self.window_height // 2, self.window_size // 2]
+
+					self.lookup_color[h // color_compression, s // color_compression, v // color_compression] = color - [h, s, v]
+
+		np.save("lookup_vingette", self.lookup_vingette)
+		np.save("lookup_color", self.lookup_color)
+
 	def display(
 		self,
 		image: np.ndarray
 	) -> None:
-        
-		image = cv.resize(image, self.display_size, cv.INTER_NEAREST)
+
+		self.last_display = image
+		image = cv.resize(image, self.display_size, interpolation=cv.INTER_NEAREST_EXACT)
 		cv.imshow("display", image)
 		cv.waitKey(1)
 
+	def debug(
+		self
+	) -> None:
+
+		if self.last_display is not None and self.last_capture is not None and self.last_recovered is not None:
+			height = round(self.last_capture.shape[0] / 2)
+			width = round((self.display_size[0] / self.display_size[1]) * height)
+			last_display = cv.resize(self.last_display, (width, height))
+			last_recovered = cv.resize(self.last_recovered, (width, height))
+			comparison = np.concatenate((last_display, last_recovered), axis=0)
+			debug_image = np.concatenate((self.last_capture, comparison), axis=1)
+			cv.imshow("debug", debug_image)
+			cv.waitKey(1)
+
 	def capture(
 		self
 	) -> np.ndarray:
 
-		image = self.camera.read()
-		if self.homography is not None:
-			image = cv.warpPerspective(image, self.homography, self.display_size)
-			image = cv.resize(image, (self.window_size, self.window_height), cv.INTER_NEAREST)
-			image = match_histograms(image, display, channel_axis=-1)
+		if self.dummy == True:
+			image = self.last_display
+
+		else:
+			image = self.camera.read()
+			self.last_capture = image
+			if self.homography is not None:
+				image = cv.warpPerspective(image, self.homography, self.display_size)
+				image = cv.resize(image, (self.window_size, self.window_height))
+
+		if self.lookup_vingette is not None and self.lookup_color is not None:
+			for row in image:
+				for pixel in row:
+					pixel = self.lookup[pixel[0], pixel[1], pixel[2]]
+
+		self.last_recovered = image
+
+		if self.show_debug == True:
+			self.debug()
 
 		return image
 
diff --git a/data/test.jpg b/data/test.jpg
old mode 100755
new mode 100644
diff --git a/data/test1.png b/data/test1.png
old mode 100755
new mode 100644
diff --git a/dummy.py b/dummy.py
new file mode 100644
index 0000000..128ca8d
--- /dev/null
+++ b/dummy.py
@@ -0,0 +1,102 @@
+from struct import unpack
+import numpy as np
+from scipy.io import wavfile
+import matplotlib.pyplot as plt
+from multiprocessing import Pool
+from camera import camera
+from fft import fft
+import pyaudio
+import sys
+import os
+import wave
+
+"""
+notes:
+- window size
+	the time to generate the spectrum is logaritmically related to the window size
+	bigger windows are exponentially better so you should prefer this if possible
+	obviously the biggest you can use is the size of your display unless you have
+	some way of arranging the pixles independant of the orrigional spectrogram
+- read size (the window size)
+	this is the amount of data that is read from the audio device at one time
+	i belive the maximum for this specific device is 990? its something to do with
+	the number of channels and the sample rate...
+
+every time the window width / 2 number of samples is available to read from the audio
+device. the program puts that chunk of audio into the biffer. each chunk is then
+appended to the last chunk. the last chunk (with no later chunk to append onto it) is
+left in the buffer to provide a smooth transition between the images
+"""
+
+window_width = 100
+window_height = 300
+sample_rate = 22_050
+channels = 1
+
+hop_size = window_width // 2
+camera = camera(window_width, window_height, (1000, 1000), device_id=2)
+transform = fft(window_width, hop_size)
+
+pyaudio_object = pyaudio.PyAudio()
+stream = pyaudio_object.open(
+	format = pyaudio.paInt16,
+	channels = channels,
+	rate = sample_rate,
+	input = True
+)
+
+buffer = []
+spectrum = np.zeros((window_height, window_width, 3), dtype=np.uint8)
+spectrum_index = 0
+audio = np.zeros((hop_size,), dtype=np.int16)
+
+try:
+	file = wave.open("out.wav", "wb")
+	file.setparams((
+		channels,
+		2,		# sample width
+		sample_rate,
+		0,
+		"NONE",		# compression type
+		"NONE"		# compression name
+	))
+
+	while stream.is_active():
+		data = stream.read(hop_size, exception_on_overflow = False)
+		data = unpack(f"<{hop_size}h", data)
+		buffer.append(list(data))
+
+		if len(buffer) == 2:
+			spectrum[spectrum_index] = transform.stft(buffer[0] + buffer[1])
+			spectrum_index += 1
+			del buffer[0]
+
+			camera.display(spectrum)
+
+		if spectrum_index == window_height:
+			spectrum_index = 0
+
+			rows = [np.array([i]) for i in spectrum]
+			with Pool(3) as p:
+				recovered = np.array(p.map(transform.istft, rows), dtype=np.int16)
+
+			for row in recovered:
+				audio[-hop_size:] += row[:hop_size]
+				audio = np.append(audio, row[hop_size:])
+
+				file.writeframes(audio[:-hop_size])
+				audio = np.delete(audio, np.s_[:-hop_size])
+
+except KeyboardInterrupt:
+
+	stream.stop_stream()
+	stream.close()
+	pyaudio_object.terminate()
+	file.close()
+
+	try:
+		sys.exit()
+	except SystemExit:
+		os._exit(130)
+
+
diff --git a/error.png b/error.png
deleted file mode 100644
index 1aeedb8..0000000
Binary files a/error.png and /dev/null differ
diff --git a/examples/hsv-_ah.jpg b/examples/hsv-_ah.jpg
new file mode 100644
index 0000000..4db6e33
Binary files /dev/null and b/examples/hsv-_ah.jpg differ
diff --git a/examples/hsv-_ah.wav b/examples/hsv-_ah.wav
new file mode 100644
index 0000000..f9e0b50
Binary files /dev/null and b/examples/hsv-_ah.wav differ
diff --git a/examples/hsv-a[m]ha.jpg b/examples/hsv-a[m]ha.jpg
new file mode 100644
index 0000000..0dcd3a1
Binary files /dev/null and b/examples/hsv-a[m]ha.jpg differ
diff --git a/examples/hsv-a[m]ha.wav b/examples/hsv-a[m]ha.wav
new file mode 100644
index 0000000..d1737cb
Binary files /dev/null and b/examples/hsv-a[m]ha.wav differ
diff --git a/examples/hsv-h_a.jpg b/examples/hsv-h_a.jpg
new file mode 100644
index 0000000..2a309e6
Binary files /dev/null and b/examples/hsv-h_a.jpg differ
diff --git a/examples/hsv-h_a.wav b/examples/hsv-h_a.wav
new file mode 100644
index 0000000..4f19eb5
Binary files /dev/null and b/examples/hsv-h_a.wav differ
diff --git a/examples/out.wav b/examples/out.wav
new file mode 100644
index 0000000..7c7e523
Binary files /dev/null and b/examples/out.wav differ
diff --git a/examples/sample.jpg b/examples/sample.jpg
new file mode 100644
index 0000000..550afc9
Binary files /dev/null and b/examples/sample.jpg differ
diff --git a/examples/xyv-h[m]ha.jpg b/examples/xyv-h[m]ha.jpg
new file mode 100644
index 0000000..0740f86
Binary files /dev/null and b/examples/xyv-h[m]ha.jpg differ
diff --git a/examples/xyv-h[m]ha.wav b/examples/xyv-h[m]ha.wav
new file mode 100644
index 0000000..1de820d
Binary files /dev/null and b/examples/xyv-h[m]ha.wav differ
diff --git a/fft.py b/fft.py
index 2950929..876c1db 100644
--- a/fft.py
+++ b/fft.py
@@ -14,10 +14,10 @@ class fft():
 		self.lower_limit = -40
 		self.upper_limit = 100
 
-		self.amplitude_max = 180
+		self.amplitude_max = 254
 		self.amplitude_min = 0
-		self.angle_max = 255
-		self.angle_min = 100
+		self.angle_max = 179
+		self.angle_min = 0
 
 		self.amplitude_relative = self.amplitude_max - self.amplitude_min
 		self.angle_relative = self.angle_max - self.angle_min
@@ -36,13 +36,15 @@ class fft():
 		amplitude = np.clip(amplitude, self.lower_limit, self.upper_limit)
 		amplitude -= self.lower_limit
 		amplitude *= (self.amplitude_relative / self.upper_limit) + self.amplitude_min
+		amplitude = np.clip(amplitude, self.amplitude_min, self.amplitude_max)
 
 		angle = np.angle(spectrum)
 		angle = ((angle + np.pi) * (self.angle_relative / (2 * np.pi))) + self.angle_min
+		angle = np.clip(angle, self.angle_min, self.angle_max)
 
 		full = np.full(angle.shape, fill_value=255)
 
-		image = np.stack((amplitude, angle, full), axis=-1)
+		image = np.stack((angle, full, amplitude), axis=-1)
 		image = np.array([image], dtype=np.uint8)
 
 		image = cv.cvtColor(image, cv.COLOR_HSV2BGR)
@@ -56,8 +58,8 @@ class fft():
 
 		image = cv.cvtColor(image, cv.COLOR_BGR2HSV)
 
-		amplitude = image[0][...,0].astype(np.float64)
-		angle = image[0][...,1].astype(np.float64)
+		amplitude = image[0][...,2].astype(np.float64)
+		angle = image[0][...,0].astype(np.float64)
 
 		amplitude -= self.amplitude_min
 		amplitude /= (self.amplitude_relative / self.upper_limit)
diff --git a/file.py b/file.py
index 1ca680c..eb427a6 100644
--- a/file.py
+++ b/file.py
@@ -6,6 +6,7 @@ import matplotlib.pyplot as plt
 from multiprocessing import Pool
 from camera import camera
 from fft import fft
+import time
 
 """
 notes:
@@ -16,8 +17,10 @@ notes:
 	some way of arranging the pixles independant of the orrigional spectrogram
 """
 
-sample_rate, data = wavfile.read("/home/will/Downloads/number-station.wav")
-new_rate = 22_050.
+sample_rate, data = wavfile.read("/home/will/Downloads/Adducci - Around the Horn.wav")
+
+
+new_rate = 22050.
 
 sample_count = round(len(data) * new_rate / sample_rate)
 data = sps.resample(data, sample_count)
@@ -26,11 +29,12 @@ sample_rate = int(new_rate)
 data = [data[i] for i in range(0, len(data), 2)]
 sample_rate = sample_rate // 2
 
-window_size = 250
-window_height = 125
+window_size = 80
+window_height = 45
 
 hop_size = window_size // 2
-camera = camera(window_size, window_height, (1000, 1000))
+camera = camera(window_size, window_height, (1840, 1000), device_id=2, debug=False, dummy=True)
+
 transform = fft(window_size, hop_size)
 
 segment_samples = window_height * hop_size
@@ -42,6 +46,8 @@ recovered_data = np.zeros(data.shape)
 
 segment_count = round(len(data) / segment_samples)
 
+camera.calibrate()
+
 for segment_index in range(segment_count):
 	segment_start = segment_index * segment_samples
 	rows = [data[segment_start + i:segment_start + i + window_size] for i in range(0, segment_samples, hop_size)]
@@ -50,9 +56,15 @@ for segment_index in range(segment_count):
 
 	spectrum = np.array(mapping)[:,0,...]
 
+	if segment_index == 10: cv.imwrite("sample.jpg", spectrum)
+
 	camera.display(spectrum)
 
-	rows = [np.array([i]) for i in spectrum]
+	time.sleep(0.5)
+
+	capture = camera.capture()
+
+	rows = [np.array([i]) for i in capture]
 	with Pool() as p:
 		recovered = np.array(p.map(transform.istft, rows))
 
diff --git a/loop.py b/loop.py
new file mode 100644
index 0000000..34bc909
--- /dev/null
+++ b/loop.py
@@ -0,0 +1,129 @@
+import cv2 as cv
+import numpy as np
+from scipy.io import wavfile
+import scipy.signal as sps
+import matplotlib.pyplot as plt
+from multiprocessing import Pool
+from camera import camera
+from struct import pack
+from fft import fft
+import time
+import pyaudio
+import os
+import sys
+import matplotlib.pyplot as plt
+
+"""
+notes:
+- window size
+	the time to generate the spectrum is logaritmically related to the window size
+	bigger windows are exponentially better so you should prefer this if possible
+	obviously the biggest you can use is the size of your display unless you have
+	some way of arranging the pixles independant of the orrigional spectrogram
+"""
+
+sample_rate, data = wavfile.read("/home/will/Downloads/Adducci - Around the Horn.wav")
+# data = data[...,0]
+
+new_rate = 22050.
+sample_count = round(len(data) * new_rate / sample_rate)
+data = sps.resample(data, sample_count)
+sample_rate = int(new_rate)
+
+window_size = 176
+window_height = 99
+
+hop_size = window_size // 2
+camera = camera(
+	window_size,
+	window_height,
+	(1840, 1000),
+	device_id=2,
+	debug=False,
+	dummy=True
+)
+camera.calibrate()
+camera.get_lookup()
+
+print(camera.lookup_vingette)
+print(camera.lookup_color)
+
+transform = fft(window_size, hop_size)
+
+segment_samples = window_height * hop_size
+overflow_samples = segment_samples - (len(data) % segment_samples) + window_size
+data = np.concatenate((data, data[0:overflow_samples]))
+
+segment_count = round(len(data) / segment_samples)
+segment_index = 0
+audio = np.zeros((hop_size,), dtype=np.int16)
+
+def callback(in_data, frame_count, time_info, status):
+
+	global audio
+
+	data = audio[:frame_count]
+	if len(data) < frame_count:
+		data = np.pad(data, [(0, frame_count - len(data))], mode='constant')
+		audio = np.zeros((hop_size,), dtype=np.int16)
+	else:
+		audio = np.delete(audio, np.s_[:frame_count])
+
+	return (data, pyaudio.paContinue)
+
+pyaudio_object = pyaudio.PyAudio()
+stream = pyaudio_object.open(
+	format = pyaudio.paInt16,
+	channels = 1,
+	rate = sample_rate,
+	frames_per_buffer = 2048,
+	output = True,
+	stream_callback = callback
+)
+
+try:
+	while stream.is_active():
+		segment_start = segment_index * segment_samples
+		rows = [data[segment_start + i:segment_start + i + window_size] for i in range(0, segment_samples, hop_size)]
+		with Pool() as p:
+			mapping = p.map(transform.stft, rows)
+
+		spectrum = np.array(mapping)[:,0,...]
+
+		if segment_index == 10: cv.imwrite("sample.jpg", spectrum)
+
+		camera.display(spectrum)
+		time.sleep(0.1)
+		capture = camera.capture()
+
+		rows = [np.array([i]) for i in capture]
+		with Pool() as p:
+			recovered = np.array(p.map(transform.istft, rows))
+
+		if len(audio) < hop_size:
+			audio = np.zeros((hop_size,), dtype=np.int16)
+
+		for row in recovered:
+			row = row.astype(np.int16)
+
+			audio[-hop_size:] += row[:hop_size]
+			audio = np.append(audio, row[hop_size:])
+
+		segment_index += 1
+		if segment_index == segment_count: segment_index = 0
+
+		slept = 0
+		while len(audio) > 2 * segment_samples:
+			time.sleep(0.01)
+			slept += 1
+		print(f"slept {slept} times")
+
+except KeyboardInterrupt:
+	stream.stop_stream()
+	stream.close()
+	pyaudio_object.terminate()
+
+	try:
+		sys.exit()
+	except SystemExit:
+		os._exit(130)
diff --git a/mum.wav b/mum.wav
new file mode 100644
index 0000000..0140a2b
Binary files /dev/null and b/mum.wav differ
diff --git a/out.wav b/out.wav
index b56a8b7..46a365a 100644
Binary files a/out.wav and b/out.wav differ
diff --git a/rec.wav b/rec.wav
deleted file mode 100644
index d365d58..0000000
Binary files a/rec.wav and /dev/null differ
diff --git a/sample.jpg b/sample.jpg
new file mode 100644
index 0000000..b9e3d34
Binary files /dev/null and b/sample.jpg differ
diff --git a/stream.py b/stream.py
index 18daf7b..4a2c360 100644
--- a/stream.py
+++ b/stream.py
@@ -28,13 +28,14 @@ appended to the last chunk. the last chunk (with no later chunk to append onto i
 left in the buffer to provide a smooth transition between the images
 """
 
-window_width = 750 
-window_height = 500
+window_width = 100
+window_height = 300
 sample_rate = 22_050
 channels = 1
 
 hop_size = window_width // 2
-camera = camera(window_width, window_height, (1000, 1000))
+camera = camera(window_width, window_height, (1000, 1000), device_id=0, debug=True)
+camera.calibrate()
 transform = fft(window_width, hop_size)
 
 pyaudio_object = pyaudio.PyAudio()
@@ -76,7 +77,9 @@ try:
 		if spectrum_index == window_height:
 			spectrum_index = 0
 
-			rows = [np.array([i]) for i in spectrum]
+			capture = camera.capture() 
+
+			rows = [np.array([i]) for i in capture]
 			with Pool(3) as p:
 				recovered = np.array(p.map(transform.istft, rows), dtype=np.int16)
 
diff --git a/test.wav b/test.wav
deleted file mode 100644
index 3dcd159..0000000
Binary files a/test.wav and /dev/null differ
diff --git a/test2.wav b/test2.wav
deleted file mode 100644
index ea303ee..0000000
Binary files a/test2.wav and /dev/null differ