Optimixation and notation

author will <greenwoodw50@gmail.com>

Sun, 25 Aug 2024 03:48:42 +0000 (04:48 +0100)

committer will <greenwoodw50@gmail.com>

Sun, 25 Aug 2024 03:48:42 +0000 (04:48 +0100)
author will <greenwoodw50@gmail.com>
Sun, 25 Aug 2024 03:48:42 +0000 (04:48 +0100)
committer will <greenwoodw50@gmail.com>
Sun, 25 Aug 2024 03:48:42 +0000 (04:48 +0100)
diff --git a/.gitignore b/.gitignore

index db7f2aca5a0c57327eef22c1a3804b6837df57f9..2bdcf07c978f079cc1058799bb9ac95449ae9812 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
  *.npy
  *.jpg
  *.wav
+__pycache__*
diff --git a/camera.py b/camera.py

index 2539baf88e5e053282b4b77663e75fabb6367c6d..64c77d2f761f7ed661b9a47b9bd54d52e5edcaee 100644 (file)
--- a/camera.py
+++ b/camera.py
@@ -21,7 +21,7 @@ class VideoCapture:
  
         # read frames as soon as they are available, keeping only most recent one
         def reader(self):
-               while True:
+               while 1:
                         ret, frame = self.camera.read()
                         if not ret:
                                 break
diff --git a/fft.py b/fft.py

index c74c4dcdd3e6719c80a68b8201aff986de419560..168513441a432e444d3b5b8893ea0343b2f8e64b 100644 (file)
--- a/fft.py
+++ b/fft.py
@@ -1,5 +1,16 @@
  import numpy as np
+import math
+import time
  import cv2 as cv
+import matplotlib.pyplot as plt
+
+"""
+Notes:
+       The data is receved in int16 format (limits -32768 to 32767). It is converted to a complex array from which we can work out power. This is from 0 to over 10,000. It is converted to decibels [1].
+       This decibels can be bounded to some upper and lower limit of both volume and noise floor. This also makes our calculations more predictable.
+
+[1] We convert to decibels via the function 20*log10(power) followed by some scaling to the required limits. We covert BACK to power via a lookup table. This is not efficient for the first conversion as the lookup table would have to be huge and have incies in the floating points.
+"""
  
  class fft():
         def __init__(
@@ -7,46 +18,66 @@ class fft():
                 window_size: int,
                 hop_size: int
         ):
+               # calculate the window and hop size, use to calulate the cosine window
                 self.window_size = window_size
                 self.hop_size = hop_size
                 self.window = np.hanning(window_size)
  
-               self.lower_limit = -40
-               self.upper_limit = 100
  
+               # set the max and min numerical values for amplitude and angle to allow for easier combinations of them both
                 self.amplitude_max = 254
                 self.amplitude_min = 0
                 self.angle_max = 254
                 self.angle_min = 0
+               # set the upper and lower limits (in dB) that are to be displayed on the screen
+               self.volume_max = 100
+               self.volume_min = -40
  
+               # calulate the range of each amplitude and angle
                 self.amplitude_relative = self.amplitude_max - self.amplitude_min
                 self.angle_relative = self.angle_max - self.angle_min
+               self.volume_relative = self.volume_max - self.volume_min
+
+               # generate lookup table for the converstion from decibels to power
+               a = self.volume_min
+               b = self.volume_relative / self.amplitude_relative
  
+               # this is the parameterized inverted function of y = (20 * log10(x) - 40) * (255/140)
+               log_lookup = [10 ** (((x * b) + a) / 20) for x in range(0, 256)]
+               self.log_lookup = np.array(log_lookup)
  
         def stft(
                 self,
                 data: np.ndarray
         ) -> np.ndarray:
  
+               # apply window and perform the fft
                 segment = data * self.window
                 spectrum = np.fft.fft(segment) / self.window_size
  
+               # convert the vector length to decimals and confine
                 amplitude = np.abs(spectrum)
+
                 amplitude = 20*np.log10(amplitude)
-               amplitude = np.clip(amplitude, self.lower_limit, self.upper_limit)
-               amplitude -= self.lower_limit
-               amplitude *= (self.amplitude_relative / self.upper_limit) + self.amplitude_min
-               amplitude = np.clip(amplitude, self.amplitude_min, self.amplitude_max)
+               amplitude = np.clip(amplitude, self.volume_min, self.volume_max)
+
+               # confine the amplitude within the limits specified
+               a = self.volume_min
+               b = self.amplitude_relative / self.volume_relative
+               c = self.amplitude_min
+               amplitude = ((amplitude - a) * b) + c
  
+               # convert x and y to the angle and confine
                 angle = np.angle(spectrum)
+
+               # confine the angle within the limits specified
                 angle = ((angle + np.pi) * (self.angle_relative / (2 * np.pi))) + self.angle_min
                 angle = np.clip(angle, self.angle_min, self.angle_max)
  
+               # rearrange to image format
                 full = np.full(angle.shape, fill_value=60)
-
                 image = np.stack((full, angle, amplitude), axis=-1)
                 image = np.array([image], dtype=np.uint8)
-
                 image = cv.cvtColor(image, cv.COLOR_HSV2BGR)
  
                 return image
@@ -56,24 +87,23 @@ class fft():
                 image: np.ndarray
         ) -> np.ndarray:
  
+               # split the image into constituant parts
                 image = cv.cvtColor(image, cv.COLOR_BGR2HSV)
-
-               amplitude = image[0][...,2].astype(np.float64)
+               amplitude = image[0][...,2].astype(np.uint8)
                 angle = image[0][...,1].astype(np.float64)
  
-               amplitude -= self.amplitude_min
-               amplitude /= (self.amplitude_relative / self.upper_limit)
-               amplitude += self.lower_limit
-               amplitude = np.power(10, amplitude / 20)
+               # convert amplitude back into vector length
+               amplitude = self.log_lookup[amplitude]
  
+               # convert angle back into x and y
                 angle = ((angle - self.angle_min) / (self.angle_relative / (2 * np.pi))) - np.pi
  
+               # rearrange back into fft result
                 real = np.cos(angle) * amplitude
                 imag = np.sin(angle) * amplitude
                 segment = real + (1j * imag)
  
-               data = np.fft.ifft(segment * self.window_size).real
+               data = np.fft.ifft(segment * self.window_size).real.astype(np.int16)
  
                 return data
  
-
diff --git a/loop.py b/loop.py

index 90c5e276630be27ddedcfab19d7578a28e75da2f..865078ca09d46f74d34aeb24c5ca208c4ca4c851 100644 (file)
--- a/loop.py
+++ b/loop.py
@@ -40,15 +40,13 @@ camera = camera(
         (1840, 1000),
         device_id = 2,
         debug = False,
-       dummy = False,
+       dummy = True,
         use_lookup = False,
         use_files = True
  )
  
  camera.calibrate()
  
-camera.get_lookup()
-
  transform = fft(window_size, hop_size)
  
  segment_samples = window_height * hop_size
@@ -94,7 +92,6 @@ try:
                 if segment_index == 10: cv.imwrite("sample.jpg", spectrum)
  
                 camera.display(spectrum)
-               time.sleep(0.1)
                 capture = camera.capture()
  
                 rows = [np.array([i]) for i in capture]
@@ -105,8 +102,6 @@ try:
                         audio = np.zeros((hop_size,), dtype=np.int16)
  
                 for row in recovered:
-                       row = row.astype(np.int16)
-
                         audio[-hop_size:] += row[:hop_size]
                         audio = np.append(audio, row[hop_size:])
  
@@ -114,6 +109,7 @@ try:
                 if segment_index == segment_count: segment_index = 0
  
                 slept = 0
+               time.sleep(0.1)
                 while len(audio) > 5 * segment_samples:
                         time.sleep(0.01)
                         slept += 1
author	will <greenwoodw50@gmail.com>
	Sun, 25 Aug 2024 03:48:42 +0000 (04:48 +0100)
committer	will <greenwoodw50@gmail.com>
	Sun, 25 Aug 2024 03:48:42 +0000 (04:48 +0100)
.gitignore		patch \| blob \| history
camera.py		patch \| blob \| history
fft.py		patch \| blob \| history
loop.py		patch \| blob \| history