The intensity function is decomposed into the weighted sum of periodic exponentials:
$$f[m,n]=\frac{1}{\sqrt{MN}}\sum_{l=0}^{N-1}\sum_{k=0}^{M-1}F[k,l] e^{j2\pi(\frac{mk}{M}+\frac{nl}{N})}$$where
$$F[k,l]=\frac{1}{\sqrt{MN}}\sum_{n=0}^{N-1}\sum_{m=0}^{M-1}f[m,n] e^{-j2\pi(\frac{mk}{M}+\frac{nl}{N})}$$Anti-aliasing¶
Anti-aliasing occurs when the sampling rate is too low allowing the bands to overlap and distort the characteristic of the original image. This can be alleviated by removing the high frequency components using some sorts of low pass filter (blurring).
Downsampling without and with filtering.¶
import numpy as np
from matplotlib import pyplot as plt
import cv2
plt.figure(figsize = (12, 8))
# Load an image
image = cv2.imread('bicycle.png')
plt.subplot(131), plt.imshow(image)
# Downsample 8 times without Gaussian
no_gaussian = image[::4,::4]
plt.subplot(132), plt.imshow(no_gaussian)
# Downsample 8 times with Gaussian
with_gaussian = cv2.GaussianBlur(image, (11,11), 8)[::4,::4]
plt.subplot(133), plt.imshow(with_gaussian)
plt.show()
Image Compression¶
Human eyes are found to be sensitive differently to different frequencies. This fact is exploited in the JPEG algorithm to compress images. The idea is to store with more precision the pixels of more sensitive frequencies without degrading the image too much.
JPEG compression algorithm¶
import numpy as np
from matplotlib import pyplot as plt
import cv2
plt.figure(figsize = (10, 10))
std_luminance_quant_tbl = [
[16, 11, 10, 16, 24, 40, 51, 61],
[12, 12, 14, 19, 26, 58, 60, 55],
[14, 13, 16, 24, 40, 57, 69, 56],
[14, 17, 22, 29, 51, 87, 80, 62],
[18, 22, 37, 56, 68, 109, 103, 77],
[24, 35, 55, 64, 81, 104, 113, 92],
[49, 64, 78, 87, 103, 121, 120, 101],
[72, 92, 95, 98, 112, 100, 103, 99]
]
def jpeg_compresss(image):
(height, width) = image.shape[:2]
new_image = np.zeros(image.shape)
for channel in range(image.shape[2]):
# forward
for start_row in range(0, height, 8):
for start_col in range(0, width, 8):
block = image[start_row:start_row+8, start_col:start_col+8, channel]
blockf = np.float64(block)
dst = cv2.dct(blockf)
blockq = np.around(np.divide(dst, std_luminance_quant_tbl))
blockq = np.multiply(blockq, std_luminance_quant_tbl)
new_image[start_row:start_row+8, start_col:start_col+8, channel] = blockq
# reverse
for start_row in range(0, height, 8):
for start_col in range(0, width, 8):
block = new_image[start_row:start_row+8, start_col:start_col+8, channel]
blockf = np.float64(block) # float conversion
dst = cv2.idct(blockf) # inverse dct
np.place(dst, dst>255.0, 255.0) # saturation
np.place(dst, dst<0.0 , 0.0) # grounding
block = np.uint8(np.around(dst))
new_image[start_row:start_row+8, start_col:start_col+8, channel] = block
return np.uint8(new_image)
original_image = cv2.imread('pens_n_coins.png')
compressed_image = jpeg_compresss(original_image)
plt.subplot(221), plt.imshow(original_image)
plt.subplot(222), plt.imshow(compressed_image)
plt.show()
comments powered by Disqus