From 9302a800fa56ae2590f1e1425798e3341aa7e3b7 Mon Sep 17 00:00:00 2001 From: will Date: Sun, 1 Sep 2024 14:49:26 +0100 Subject: [PATCH] Fixed buzzing + added brightness, contrast, temerature and tint bindings --- camera.py | 58 ++++++++++++++++++++++++++++++++----------------------- fft.py | 29 ++++++++++++++-------------- loop.py | 46 ++++++++++++++++++++++++++++++++++--------- 3 files changed, 85 insertions(+), 48 deletions(-) diff --git a/camera.py b/camera.py index f4259f9..325a2d2 100644 --- a/camera.py +++ b/camera.py @@ -45,10 +45,14 @@ class camera(): window_height: int, display_size: tuple, device_id: int = 0, + brightness: float = 1., + contrast: float = 0., + temperature: float = 0., + tint: float = 0., debug: bool = True, dummy: bool = False, use_lookup: bool = False, - use_files: bool = False + use_files: bool = False, ): self.window_size = window_size @@ -118,49 +122,49 @@ class camera(): print("calibration failed") quit() - if self.use_files == True: + if self.use_lookup == True: self.get_lookup() def get_lookup( self ) -> None: - if self.use_lookup == True: - return - if self.use_files == True: self.lookup = np.load("lookup.npy") return lookup = None - for r in range(0, 255, self.lookup_compression): - for g in range(0, 255, self.lookup_compression): - for b in range(0, 255, self.lookup_compression): - pixel = np.array([[[b, g, r]]], dtype=np.int8) - pixel = cv.resize(pixel, self.display_size, interpolation=cv.INTER_NEAREST_EXACT) - self.display(pixel) + for (r, g, b) in [(0,0,0), (255,255,255)]: + pixel = np.array([[[b, g, r]]], dtype=np.int8) + pixel = cv.resize(pixel, self.display_size, interpolation=cv.INTER_NEAREST_EXACT) + self.display(pixel) - for i in range(10): # silly hack - time.sleep(0.2) - recovered = self.capture() + for i in range(100): # silly hack + cv.waitKey(1) + recovered = self.capture() - error = np.copy(recovered.astype(np.int16)) - error[..., 0] -= b - error[..., 1] -= g - error[..., 2] -= r + error = np.copy(recovered.astype(np.int16)) + error[..., 0] -= b + error[..., 1] -= g + error[..., 2] -= r - error = np.clip(error, -50, 255) + error = np.clip(error, -50, 255) - if lookup is None: - lookup = error - else: - lookup += error - lookup = lookup // 2 + if lookup is None: + lookup = error + else: + lookup += error + lookup = lookup // 2 self.lookup = lookup np.save("lookup.npy", lookup) + lookup += np.min(lookup) + lookup = np.round(lookup * (255 / np.min(lookup))) + cv.imwrite("lookup.jpg", lookup) + + def display( self, image: np.ndarray @@ -168,6 +172,11 @@ class camera(): self.last_display = image image = cv.resize(image, self.display_size, interpolation=cv.INTER_NEAREST_EXACT) + image = cv.convertScaleAbs(image, alpha=self.contrast, beta=self.brightness) # contrast / brightness correction + image[...,2] + self.temperature # color correction + image[...,1] + self.tint + image[...,0] - self.temperature + cv.imshow("display", image) cv.waitKey(1) @@ -194,6 +203,7 @@ class camera(): else: _, image = self.camera.read() + self.last_capture = image if self.homography is not None: image = cv.warpPerspective(image, self.homography, self.display_size) diff --git a/fft.py b/fft.py index 45af6c4..bd0d1df 100644 --- a/fft.py +++ b/fft.py @@ -1,16 +1,18 @@ import numpy as np -import math -import time import cv2 as cv import matplotlib.pyplot as plt -import cProfile """ Notes: The data is receved in int16 format (limits -32768 to 32767). It is converted to a complex array from which we can work out power. This is from 0 to over 10,000. It is converted to decibels [1]. This decibels can be bounded to some upper and lower limit of both volume and noise floor. This also makes our calculations more predictable. + For some reason, when the lookup table scales via the relative volume insted of the volume, it all fucks up. I cant seem to work out why even though i belive this is mathematically wrong. Either way, the program has been changed to allow this while i work out why this is happening. [2] + [1] We convert to decibels via the function 20*log10(power) followed by some scaling to the required limits. We covert BACK to power via a lookup table. This is not efficient for the first conversion as the lookup table would have to be huge and have incies in the floating points. + +[2] The normal FFT has been changed to mirror the IFFT, this does seem to produce some peeking, if this becomes a problem it can be changed back at the expense of some volume. + """ class fft(): @@ -41,7 +43,7 @@ class fft(): # generate lookup table for the converstion from decibels to power a = self.volume_min - b = self.volume_relative / self.amplitude_relative + b = self.volume_max / self.amplitude_relative # this is the parameterized inverted function of y = (20 * log10(x) - 40) * (255/140) log_lookup = [10 ** (((x * b) + a) / 20) for x in range(0, 256)] @@ -64,7 +66,7 @@ class fft(): # confine the amplitude within the limits specified a = self.volume_min - b = self.amplitude_relative / self.volume_relative + b = self.amplitude_relative / self.volume_relative # possibly change the vol_max to vol_rel ?? see [2] c = self.amplitude_min amplitude = ((amplitude - a) * b) + c @@ -77,7 +79,7 @@ class fft(): # rearrange to image format full = np.full(angle.shape, fill_value=60) - image = np.stack((full, angle, amplitude), axis=-1) + image = np.stack((amplitude * (180/255), angle, amplitude), axis=-1) image = np.array([image], dtype=np.uint8) image = cv.cvtColor(image, cv.COLOR_HSV2BGR) @@ -90,8 +92,11 @@ class fft(): # split the image into constituant parts image = cv.cvtColor(image, cv.COLOR_BGR2HSV) - amplitude = image[0][...,2].astype(np.uint8) + amplitude = image[0][...,2] angle = image[0][...,1].astype(np.float64) + hue = image[0][...,0].astype(np.float64) * (255/180) + + amplitude = np.mean( np.array([ amplitude, hue ]), axis=0 ).astype(np.uint8) # convert amplitude back into vector length amplitude = self.log_lookup[amplitude] @@ -104,12 +109,6 @@ class fft(): imag = np.sin(angle) * amplitude segment = real + (1j * imag) - data = np.fft.ifft(segment * self.window_size).real.astype(np.int16) - - return data + data = np.fft.ifft(segment * self.window_size).real -if __name__ == "__main__": - with cProfile.Profile() as pr: - fft = fft(130, 65) - fft.istft(fft.stft(np.random.randint(-32768, 32767, (130,)))) - pr.print_stats() + return data.astype(np.int16) diff --git a/loop.py b/loop.py index 0e6acda..ed974b8 100644 --- a/loop.py +++ b/loop.py @@ -11,7 +11,10 @@ import time import pyaudio import os import sys +import wave import matplotlib.pyplot as plt +import gi +from gi.repository import Gtk """ notes: @@ -25,13 +28,13 @@ notes: sample_rate, data = wavfile.read("/home/will/Downloads/birdsong.wav") #data = data[...,0] -new_rate = 10000. +new_rate = 11025. sample_count = round(len(data) * new_rate / sample_rate) data = sps.resample(data, sample_count) sample_rate = int(new_rate) -window_size = 130 -window_height = 70 +window_size = 170 +window_height = 80 hop_size = window_size // 2 camera = camera( @@ -39,12 +42,26 @@ camera = camera( window_height, (1920, 1080), device_id = 2, - debug = False, - dummy = True, + brightness = 1., + contrast = 0., + temperature = 0., + tint = 0., + debug = True, + dummy = False, use_lookup = False, - use_files = True + use_files = False ) +file = wave.open("out.wav", "wb") +file.setparams(( + 1, # channels + 2, # sample width + sample_rate, + 0, + "NONE", # compression type + "NONE" # compression name +)) + camera.calibrate() transform = fft(window_size, hop_size) @@ -94,6 +111,9 @@ try: camera.display(spectrum) capture = camera.capture() + # plt.clf() + # plt.plot(rows[0]) + rows = [np.array([i]) for i in capture] with Pool() as p: recovered = np.array(p.map(transform.istft, rows)) @@ -101,24 +121,32 @@ try: if len(audio) < hop_size: audio = np.zeros((hop_size,), dtype=np.int16) + # plt.plot(recovered[0]) + # plt.pause(0.05) + for row in recovered: audio[-hop_size:] += row[:hop_size] audio = np.append(audio, row[hop_size:]) + #file.writeframes(row[hop_size:]) + segment_index += 1 if segment_index == segment_count: segment_index = 0 - slept = 0 + slept = False time.sleep(0.1) while len(audio) > 1 * segment_samples: + slept = True cv.waitKey(1) - slept += 1 - print(f"slept {slept} times") + + if not slept: + print("Dropped frames!") except KeyboardInterrupt: stream.stop_stream() stream.close() pyaudio_object.terminate() + file.close() try: sys.exit() -- 2.39.2