]> OzVa Git service - audio-over-stft/commitdiff
init
authorwill <greenwoodw50@gmail.com>
Sat, 30 Mar 2024 23:04:28 +0000 (23:04 +0000)
committerwill <greenwoodw50@gmail.com>
Sat, 30 Mar 2024 23:04:28 +0000 (23:04 +0000)
AOSTFT.py [new file with mode: 0755]
calibration/calibration.png [new file with mode: 0755]
calibration/calibration1.png [new file with mode: 0755]
data/audio1.wav [new file with mode: 0755]
data/audio2.wav [new file with mode: 0755]
data/test.jpg [new file with mode: 0755]
data/test1.png [new file with mode: 0755]
out.wav [new file with mode: 0755]

diff --git a/AOSTFT.py b/AOSTFT.py
new file mode 100755 (executable)
index 0000000..a954048
--- /dev/null
+++ b/AOSTFT.py
@@ -0,0 +1,141 @@
+import wave\r
+import random\r
+import time\r
+import cv2 as cv\r
+import numpy as np\r
+from scipy.signal import ShortTimeFFT\r
+from scipy.signal.windows import cosine\r
+from scipy.io import wavfile\r
+import os\r
+\r
+\r
+import matplotlib.pyplot as plt\r
+\r
+def calibrate(windowsize):\r
+    print("Attempting calibration")\r
+    calibrated2 = False\r
+    while not calibrated2:\r
+\r
+        calibrationimage = cv.imread("calibration.png")\r
+        cv.imshow("display", calibrationimage)\r
+        cv.waitKey(1)\r
+        \r
+        cameraimage = cv.imread("test.jpg") #replace with taking a picture from the camera\r
+\r
+        #detect SIFT keypoints\r
+        sift = cv.SIFT_create()\r
+        kp1, des1 = sift.detectAndCompute(calibrationimage,None)\r
+        kp2, des2 = sift.detectAndCompute(cameraimage,None)\r
+\r
+        #cv2 bullshit\r
+        FLANN_INDEX_KDTREE = 1\r
+        index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)\r
+        search_params = dict(checks = 50)\r
+        flann = cv.FlannBasedMatcher(index_params, search_params)\r
+        matches = flann.knnMatch(des1,des2,k=2)\r
+        #get good matches via ratio test\r
+        good = []\r
+        for m,n in matches:\r
+            if m.distance < 0.7*n.distance:\r
+                good.append(m)\r
+        \r
+        #if theres enough matches\r
+        if len(good)>10:\r
+            src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)\r
+            dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)\r
+            M, mask = cv.findHomography(dst_pts, src_pts, cv.RANSAC, 5.0)\r
+            \r
+            img3 = cv.warpPerspective(cameraimage, M, windowsize)\r
+\r
+            calibrated2 = True\r
+            print("Calibration sucessfull")\r
+\r
+            cv.imshow("display", img3)\r
+            cv.waitKey(1)\r
+        else:\r
+            print("calibration unsucessfull - retrying...")\r
+\r
+        return M\r
+\r
+def getSFT():\r
+    width = 719\r
+    w = cosine(width, sym=True)\r
+    SFT = ShortTimeFFT(w, hop=1, fs=16_000, scale_to='magnitude') # hop was og width *2 but then it started shouting at me\r
+\r
+    return SFT\r
+\r
+def getSTFT(data, start, stop):\r
+\r
+    diff = (stop-start)-len(data[start:stop])\r
+    if diff > 0:\r
+        data = np.pad(data, (0, diff), 'constant')\r
+\r
+    SFT = getSFT()\r
+    Sx = SFT.stft(data, p0=start, p1=stop)\r
+\r
+    Sx = 20*np.log10(Sx)\r
+    real, imag = Sx.real, Sx.imag\r
+\r
+    img = np.stack((real, imag, [real, imag][random.randint(0,1)]), axis=-1)\r
+    img = np.array(img+128, dtype=np.uint8)\r
+\r
+    return img\r
+\r
+def getISIFT(img, predata, step):\r
+\r
+    img = np.array(img, dtype=np.float64)-128\r
+\r
+    real, imag = img[...,0], img[...,1]\r
+\r
+    Sx = np.vectorize(complex)(real, imag)\r
+    Sx = np.power(10, Sx/20) # i think this bit is wrong\r
+\r
+    SFT = getSFT()\r
+    data = SFT.istft(Sx, k1=step)\r
+    data = np.array(data, dtype=np.int16)\r
+\r
+    return data\r
+\r
+def transmit(img, homo, windowsize):\r
+\r
+    img = cv.resize(img, (1080, 720), cv.INTER_NEAREST)\r
+\r
+    cv.imshow("display", img)\r
+    cv.waitKey(1)\r
+\r
+    #cap = cv.warpPerspective(cameraimage, homo, windowsize)\r
+    \r
+    img = cv.resize(img, (1080, 360), cv.INTER_NEAREST)\r
+\r
+    return img\r
+\r
+if __name__ == "__main__":\r
+\r
+    windowsize = (1080, 720)\r
+\r
+    cv.namedWindow("display")\r
+    homo = None\r
+    #homo = calibrate()\r
+\r
+    sr, data = wavfile.read("audio2.wav")\r
+    data = np.array(data, dtype=np.int16)\r
+    SFT = getSFT()\r
+\r
+    step = 360\r
+    newdata = np.zeros((1,), dtype=np.int16)\r
+    try:\r
+        for i in range(0, len(data), step):\r
+            img = getSTFT(data, i, i+step)\r
+            img = transmit(img, homo, windowsize)\r
+            recovered = getISIFT(img, data[i:i+step], step)\r
+            newdata = np.concatenate((newdata, recovered), axis=0)\r
+            os.system('cls')\r
+            print(f"total difference: {np.sum(abs(recovered-data[i:i+step]))}")\r
+            print(f"origional data limits: {np.max(data), np.min(data)}")\r
+            print(f"current data limits: {np.max(newdata), np.min(newdata)}")\r
+            print(f"current data factor: {np.max(data)//np.max(newdata), np.min(data)//np.min(newdata)}")\r
+            print(f"{round(i/data.shape[0], 2)*100}% done")\r
+    except:\r
+        print("errored out!")\r
+\r
+    wavfile.write("out.wav", sr, newdata)
\ No newline at end of file
diff --git a/calibration/calibration.png b/calibration/calibration.png
new file mode 100755 (executable)
index 0000000..40576b8
Binary files /dev/null and b/calibration/calibration.png differ
diff --git a/calibration/calibration1.png b/calibration/calibration1.png
new file mode 100755 (executable)
index 0000000..83bb076
Binary files /dev/null and b/calibration/calibration1.png differ
diff --git a/data/audio1.wav b/data/audio1.wav
new file mode 100755 (executable)
index 0000000..e53bc92
Binary files /dev/null and b/data/audio1.wav differ
diff --git a/data/audio2.wav b/data/audio2.wav
new file mode 100755 (executable)
index 0000000..70ac56b
Binary files /dev/null and b/data/audio2.wav differ
diff --git a/data/test.jpg b/data/test.jpg
new file mode 100755 (executable)
index 0000000..c70a992
Binary files /dev/null and b/data/test.jpg differ
diff --git a/data/test1.png b/data/test1.png
new file mode 100755 (executable)
index 0000000..24cb6bf
Binary files /dev/null and b/data/test1.png differ
diff --git a/out.wav b/out.wav
new file mode 100755 (executable)
index 0000000..60f58a3
Binary files /dev/null and b/out.wav differ