window_height: int,
display_size: tuple,
device_id: int = 0,
+ brightness: float = 1.,
+ contrast: float = 0.,
+ temperature: float = 0.,
+ tint: float = 0.,
debug: bool = True,
dummy: bool = False,
use_lookup: bool = False,
- use_files: bool = False
+ use_files: bool = False,
):
self.window_size = window_size
print("calibration failed")
quit()
- if self.use_files == True:
+ if self.use_lookup == True:
self.get_lookup()
def get_lookup(
self
) -> None:
- if self.use_lookup == True:
- return
-
if self.use_files == True:
self.lookup = np.load("lookup.npy")
return
lookup = None
- for r in range(0, 255, self.lookup_compression):
- for g in range(0, 255, self.lookup_compression):
- for b in range(0, 255, self.lookup_compression):
- pixel = np.array([[[b, g, r]]], dtype=np.int8)
- pixel = cv.resize(pixel, self.display_size, interpolation=cv.INTER_NEAREST_EXACT)
- self.display(pixel)
+ for (r, g, b) in [(0,0,0), (255,255,255)]:
+ pixel = np.array([[[b, g, r]]], dtype=np.int8)
+ pixel = cv.resize(pixel, self.display_size, interpolation=cv.INTER_NEAREST_EXACT)
+ self.display(pixel)
- for i in range(10): # silly hack
- time.sleep(0.2)
- recovered = self.capture()
+ for i in range(100): # silly hack
+ cv.waitKey(1)
+ recovered = self.capture()
- error = np.copy(recovered.astype(np.int16))
- error[..., 0] -= b
- error[..., 1] -= g
- error[..., 2] -= r
+ error = np.copy(recovered.astype(np.int16))
+ error[..., 0] -= b
+ error[..., 1] -= g
+ error[..., 2] -= r
- error = np.clip(error, -50, 255)
+ error = np.clip(error, -50, 255)
- if lookup is None:
- lookup = error
- else:
- lookup += error
- lookup = lookup // 2
+ if lookup is None:
+ lookup = error
+ else:
+ lookup += error
+ lookup = lookup // 2
self.lookup = lookup
np.save("lookup.npy", lookup)
+ lookup += np.min(lookup)
+ lookup = np.round(lookup * (255 / np.min(lookup)))
+ cv.imwrite("lookup.jpg", lookup)
+
+
def display(
self,
image: np.ndarray
self.last_display = image
image = cv.resize(image, self.display_size, interpolation=cv.INTER_NEAREST_EXACT)
+ image = cv.convertScaleAbs(image, alpha=self.contrast, beta=self.brightness) # contrast / brightness correction
+ image[...,2] + self.temperature # color correction
+ image[...,1] + self.tint
+ image[...,0] - self.temperature
+
cv.imshow("display", image)
cv.waitKey(1)
else:
_, image = self.camera.read()
+
self.last_capture = image
if self.homography is not None:
image = cv.warpPerspective(image, self.homography, self.display_size)
import numpy as np
-import math
-import time
import cv2 as cv
import matplotlib.pyplot as plt
-import cProfile
"""
Notes:
The data is receved in int16 format (limits -32768 to 32767). It is converted to a complex array from which we can work out power. This is from 0 to over 10,000. It is converted to decibels [1].
This decibels can be bounded to some upper and lower limit of both volume and noise floor. This also makes our calculations more predictable.
+ For some reason, when the lookup table scales via the relative volume insted of the volume, it all fucks up. I cant seem to work out why even though i belive this is mathematically wrong. Either way, the program has been changed to allow this while i work out why this is happening. [2]
+
[1] We convert to decibels via the function 20*log10(power) followed by some scaling to the required limits. We covert BACK to power via a lookup table. This is not efficient for the first conversion as the lookup table would have to be huge and have incies in the floating points.
+
+[2] The normal FFT has been changed to mirror the IFFT, this does seem to produce some peeking, if this becomes a problem it can be changed back at the expense of some volume.
+
"""
class fft():
# generate lookup table for the converstion from decibels to power
a = self.volume_min
- b = self.volume_relative / self.amplitude_relative
+ b = self.volume_max / self.amplitude_relative
# this is the parameterized inverted function of y = (20 * log10(x) - 40) * (255/140)
log_lookup = [10 ** (((x * b) + a) / 20) for x in range(0, 256)]
# confine the amplitude within the limits specified
a = self.volume_min
- b = self.amplitude_relative / self.volume_relative
+ b = self.amplitude_relative / self.volume_relative # possibly change the vol_max to vol_rel ?? see [2]
c = self.amplitude_min
amplitude = ((amplitude - a) * b) + c
# rearrange to image format
full = np.full(angle.shape, fill_value=60)
- image = np.stack((full, angle, amplitude), axis=-1)
+ image = np.stack((amplitude * (180/255), angle, amplitude), axis=-1)
image = np.array([image], dtype=np.uint8)
image = cv.cvtColor(image, cv.COLOR_HSV2BGR)
# split the image into constituant parts
image = cv.cvtColor(image, cv.COLOR_BGR2HSV)
- amplitude = image[0][...,2].astype(np.uint8)
+ amplitude = image[0][...,2]
angle = image[0][...,1].astype(np.float64)
+ hue = image[0][...,0].astype(np.float64) * (255/180)
+
+ amplitude = np.mean( np.array([ amplitude, hue ]), axis=0 ).astype(np.uint8)
# convert amplitude back into vector length
amplitude = self.log_lookup[amplitude]
imag = np.sin(angle) * amplitude
segment = real + (1j * imag)
- data = np.fft.ifft(segment * self.window_size).real.astype(np.int16)
-
- return data
+ data = np.fft.ifft(segment * self.window_size).real
-if __name__ == "__main__":
- with cProfile.Profile() as pr:
- fft = fft(130, 65)
- fft.istft(fft.stft(np.random.randint(-32768, 32767, (130,))))
- pr.print_stats()
+ return data.astype(np.int16)
import pyaudio
import os
import sys
+import wave
import matplotlib.pyplot as plt
+import gi
+from gi.repository import Gtk
"""
notes:
sample_rate, data = wavfile.read("/home/will/Downloads/birdsong.wav")
#data = data[...,0]
-new_rate = 10000.
+new_rate = 11025.
sample_count = round(len(data) * new_rate / sample_rate)
data = sps.resample(data, sample_count)
sample_rate = int(new_rate)
-window_size = 130
-window_height = 70
+window_size = 170
+window_height = 80
hop_size = window_size // 2
camera = camera(
window_height,
(1920, 1080),
device_id = 2,
- debug = False,
- dummy = True,
+ brightness = 1.,
+ contrast = 0.,
+ temperature = 0.,
+ tint = 0.,
+ debug = True,
+ dummy = False,
use_lookup = False,
- use_files = True
+ use_files = False
)
+file = wave.open("out.wav", "wb")
+file.setparams((
+ 1, # channels
+ 2, # sample width
+ sample_rate,
+ 0,
+ "NONE", # compression type
+ "NONE" # compression name
+))
+
camera.calibrate()
transform = fft(window_size, hop_size)
camera.display(spectrum)
capture = camera.capture()
+ # plt.clf()
+ # plt.plot(rows[0])
+
rows = [np.array([i]) for i in capture]
with Pool() as p:
recovered = np.array(p.map(transform.istft, rows))
if len(audio) < hop_size:
audio = np.zeros((hop_size,), dtype=np.int16)
+ # plt.plot(recovered[0])
+ # plt.pause(0.05)
+
for row in recovered:
audio[-hop_size:] += row[:hop_size]
audio = np.append(audio, row[hop_size:])
+ #file.writeframes(row[hop_size:])
+
segment_index += 1
if segment_index == segment_count: segment_index = 0
- slept = 0
+ slept = False
time.sleep(0.1)
while len(audio) > 1 * segment_samples:
+ slept = True
cv.waitKey(1)
- slept += 1
- print(f"slept {slept} times")
+
+ if not slept:
+ print("Dropped frames!")
except KeyboardInterrupt:
stream.stop_stream()
stream.close()
pyaudio_object.terminate()
+ file.close()
try:
sys.exit()