From: will Date: Sat, 30 Mar 2024 23:04:28 +0000 (+0000) Subject: init X-Git-Url: https://git.ozva.co.uk/?a=commitdiff_plain;h=3be78b944a8ac1ee071b61a1cb142bdd5789660a;p=audio-over-stft init --- 3be78b944a8ac1ee071b61a1cb142bdd5789660a diff --git a/AOSTFT.py b/AOSTFT.py new file mode 100755 index 0000000..a954048 --- /dev/null +++ b/AOSTFT.py @@ -0,0 +1,141 @@ +import wave +import random +import time +import cv2 as cv +import numpy as np +from scipy.signal import ShortTimeFFT +from scipy.signal.windows import cosine +from scipy.io import wavfile +import os + + +import matplotlib.pyplot as plt + +def calibrate(windowsize): + print("Attempting calibration") + calibrated2 = False + while not calibrated2: + + calibrationimage = cv.imread("calibration.png") + cv.imshow("display", calibrationimage) + cv.waitKey(1) + + cameraimage = cv.imread("test.jpg") #replace with taking a picture from the camera + + #detect SIFT keypoints + sift = cv.SIFT_create() + kp1, des1 = sift.detectAndCompute(calibrationimage,None) + kp2, des2 = sift.detectAndCompute(cameraimage,None) + + #cv2 bullshit + FLANN_INDEX_KDTREE = 1 + index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5) + search_params = dict(checks = 50) + flann = cv.FlannBasedMatcher(index_params, search_params) + matches = flann.knnMatch(des1,des2,k=2) + #get good matches via ratio test + good = [] + for m,n in matches: + if m.distance < 0.7*n.distance: + good.append(m) + + #if theres enough matches + if len(good)>10: + src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2) + dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2) + M, mask = cv.findHomography(dst_pts, src_pts, cv.RANSAC, 5.0) + + img3 = cv.warpPerspective(cameraimage, M, windowsize) + + calibrated2 = True + print("Calibration sucessfull") + + cv.imshow("display", img3) + cv.waitKey(1) + else: + print("calibration unsucessfull - retrying...") + + return M + +def getSFT(): + width = 719 + w = cosine(width, sym=True) + SFT = ShortTimeFFT(w, hop=1, fs=16_000, scale_to='magnitude') # hop was og width *2 but then it started shouting at me + + return SFT + +def getSTFT(data, start, stop): + + diff = (stop-start)-len(data[start:stop]) + if diff > 0: + data = np.pad(data, (0, diff), 'constant') + + SFT = getSFT() + Sx = SFT.stft(data, p0=start, p1=stop) + + Sx = 20*np.log10(Sx) + real, imag = Sx.real, Sx.imag + + img = np.stack((real, imag, [real, imag][random.randint(0,1)]), axis=-1) + img = np.array(img+128, dtype=np.uint8) + + return img + +def getISIFT(img, predata, step): + + img = np.array(img, dtype=np.float64)-128 + + real, imag = img[...,0], img[...,1] + + Sx = np.vectorize(complex)(real, imag) + Sx = np.power(10, Sx/20) # i think this bit is wrong + + SFT = getSFT() + data = SFT.istft(Sx, k1=step) + data = np.array(data, dtype=np.int16) + + return data + +def transmit(img, homo, windowsize): + + img = cv.resize(img, (1080, 720), cv.INTER_NEAREST) + + cv.imshow("display", img) + cv.waitKey(1) + + #cap = cv.warpPerspective(cameraimage, homo, windowsize) + + img = cv.resize(img, (1080, 360), cv.INTER_NEAREST) + + return img + +if __name__ == "__main__": + + windowsize = (1080, 720) + + cv.namedWindow("display") + homo = None + #homo = calibrate() + + sr, data = wavfile.read("audio2.wav") + data = np.array(data, dtype=np.int16) + SFT = getSFT() + + step = 360 + newdata = np.zeros((1,), dtype=np.int16) + try: + for i in range(0, len(data), step): + img = getSTFT(data, i, i+step) + img = transmit(img, homo, windowsize) + recovered = getISIFT(img, data[i:i+step], step) + newdata = np.concatenate((newdata, recovered), axis=0) + os.system('cls') + print(f"total difference: {np.sum(abs(recovered-data[i:i+step]))}") + print(f"origional data limits: {np.max(data), np.min(data)}") + print(f"current data limits: {np.max(newdata), np.min(newdata)}") + print(f"current data factor: {np.max(data)//np.max(newdata), np.min(data)//np.min(newdata)}") + print(f"{round(i/data.shape[0], 2)*100}% done") + except: + print("errored out!") + + wavfile.write("out.wav", sr, newdata) \ No newline at end of file diff --git a/calibration/calibration.png b/calibration/calibration.png new file mode 100755 index 0000000..40576b8 Binary files /dev/null and b/calibration/calibration.png differ diff --git a/calibration/calibration1.png b/calibration/calibration1.png new file mode 100755 index 0000000..83bb076 Binary files /dev/null and b/calibration/calibration1.png differ diff --git a/data/audio1.wav b/data/audio1.wav new file mode 100755 index 0000000..e53bc92 Binary files /dev/null and b/data/audio1.wav differ diff --git a/data/audio2.wav b/data/audio2.wav new file mode 100755 index 0000000..70ac56b Binary files /dev/null and b/data/audio2.wav differ diff --git a/data/test.jpg b/data/test.jpg new file mode 100755 index 0000000..c70a992 Binary files /dev/null and b/data/test.jpg differ diff --git a/data/test1.png b/data/test1.png new file mode 100755 index 0000000..24cb6bf Binary files /dev/null and b/data/test1.png differ diff --git a/out.wav b/out.wav new file mode 100755 index 0000000..60f58a3 Binary files /dev/null and b/out.wav differ