Pytorch 圖像變換函數(shù)集合小結(jié)
一、必要的 python 模塊
PyTorch 的 Vision 模塊提供了圖像變換的很多函數(shù).
torchvision/transforms/functional.py
from __future__ import division import torch import sys import math from PIL import Image, ImageOps, ImageEnhance, PILLOW_VERSION try: import accimage except ImportError: accimage = None import numpy as np import numbers import collections import warnings import matplotlib as plt if sys.version_info < (3, 3): Sequence = collections.Sequence Iterable = collections.Iterable else: Sequence = collections.abc.Sequence Iterable = collections.abc.Iterable
以下圖為例:
img_file = "test.jpe" img = Image.open(img_file) width, height = img.size #(750, 815) img.show()
二、PyTorch 圖像變換函數(shù)
2.1 判斷圖像數(shù)據(jù)類型
# 圖像格式檢查,如,pil, tensor, numpy def _is_pil_image(img): if accimage is not None: return isinstance(img, (Image.Image, accimage.Image)) else: return isinstance(img, Image.Image) def _is_tensor_image(img): return torch.is_tensor(img) and img.ndimension() == 3 def _is_numpy_image(img): return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
# example: _is_pil_image(img) # True _is_tensor_image(img) # False _is_numpy_image(img) # False _is_numpy_image(np.array(img)) # True
2.2 to_tensor(pic)
將 PIL Image
或 nupy.ndarray
轉(zhuǎn)換為 tensor
def to_tensor(pic): """ Args: pic (PIL Image or numpy.ndarray): Image to be converted to tensor. Returns: Tensor: Converted image. """ if not(_is_pil_image(pic) or _is_numpy_image(pic)): raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic))) if isinstance(pic, np.ndarray): # handle numpy array img = torch.from_numpy(pic.transpose((2, 0, 1))) # backward compatibility if isinstance(img, torch.ByteTensor): return img.float().div(255) else: return img if accimage is not None and isinstance(pic, accimage.Image): nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) pic.copyto(nppic) return torch.from_numpy(nppic) # handle PIL Image if pic.mode == 'I': img = torch.from_numpy(np.array(pic, np.int32, copy=False)) elif pic.mode == 'I;16': img = torch.from_numpy(np.array(pic, np.int16, copy=False)) elif pic.mode == 'F': img = torch.from_numpy(np.array(pic, np.float32, copy=False)) elif pic.mode == '1': img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False)) else: img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) # PIL image mode: L, P, I, F, RGB, YCbCr, RGBA, CMYK if pic.mode == 'YCbCr': nchannel = 3 elif pic.mode == 'I;16': nchannel = 1 else: nchannel = len(pic.mode) img = img.view(pic.size[1], pic.size[0], nchannel) # put it from HWC to CHW format # yikes, this transpose takes 80% of the loading time/CPU img = img.transpose(0, 1).transpose(0, 2).contiguous() if isinstance(img, torch.ByteTensor): return img.float().div(255) else: return img
2.3 to_pil_image(pic, mode=None)
將 tensor
或 ndarray
轉(zhuǎn)換為 PIL Image
def to_pil_image(pic, mode=None): """ Args: pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes Returns: PIL Image: Image converted to PIL Image. """ if not(isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)): raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic))) elif isinstance(pic, torch.Tensor): if pic.ndimension() not in {2, 3}: raise ValueError('pic should be 2/3 dimensional. Got {} '\ 'dimensions.'.format(pic.ndimension())) elif pic.ndimension() == 2: # if 2D image, add channel dimension (CHW) pic.unsqueeze_(0) elif isinstance(pic, np.ndarray): if pic.ndim not in {2, 3}: raise ValueError('pic should be 2/3 dimensional. Got {} '\ 'dimensions.'.format(pic.ndim)) elif pic.ndim == 2: # if 2D image, add channel dimension (HWC) pic = np.expand_dims(pic, 2) npimg = pic if isinstance(pic, torch.FloatTensor): pic = pic.mul(255).byte() if isinstance(pic, torch.Tensor): npimg = np.transpose(pic.numpy(), (1, 2, 0)) if not isinstance(npimg, np.ndarray): raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' + 'not {}'.format(type(npimg))) if npimg.shape[2] == 1: expected_mode = None npimg = npimg[:, :, 0] if npimg.dtype == np.uint8: expected_mode = 'L' elif npimg.dtype == np.int16: expected_mode = 'I;16' elif npimg.dtype == np.int32: expected_mode = 'I' elif npimg.dtype == np.float32: expected_mode = 'F' if mode is not None and mode != expected_mode: raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}" .format(mode, np.dtype, expected_mode)) mode = expected_mode elif npimg.shape[2] == 4: permitted_4_channel_modes = ['RGBA', 'CMYK'] if mode is not None and mode not in permitted_4_channel_modes: raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes)) if mode is None and npimg.dtype == np.uint8: mode = 'RGBA' else: permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV'] if mode is not None and mode not in permitted_3_channel_modes: raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes)) if mode is None and npimg.dtype == np.uint8: mode = 'RGB' if mode is None: raise TypeError('Input type {} is not supported'.format(npimg.dtype)) return Image.fromarray(npimg, mode=mode)
2.4 normalize(tensor, mean, std)
歸一化 tensor
的圖像. in-place
計(jì)算.
def normalize(tensor, mean, std): """ Args: tensor (Tensor): Tensor image of size (C, H, W) to be normalized. mean (sequence): Sequence of means for each channel. std (sequence): Sequence of standard deviations for each channely. Returns: Tensor: Normalized Tensor image. """ if not _is_tensor_image(tensor): raise TypeError('tensor is not a torch image.') # This is faster than using broadcasting, don't change without benchmarking for t, m, s in zip(tensor, mean, std): t.sub_(m).div_(s) return tensor
# example mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] img_normalize = normalize(img_tensor, mean, std) # vis ax1 = plt.subplot(1, 2, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 2, 2) ax2.imshow(to_pil_image(img_normalize)) ax2.axis("off") ax2.set_title("normalize img") plt.show()
2.5 resize(img, size, interpolation=Image.BILINEAR)
對(duì)輸入的 PIL Image 進(jìn)行 resize 到給定尺寸.
參數(shù) size 為調(diào)整后的尺寸.
如果 size 是數(shù)組(h, w),則直接調(diào)整到該 (h, w) 尺寸.
如果 size 是一個(gè) int 值,則調(diào)整后圖像的最短邊是該值,且保持固定的長(zhǎng)寬比.
def resize(img, size, interpolation=Image.BILINEAR): """ Args: img (PIL Image): Image to be resized. size (sequence or int): Desired output size. interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR`` Returns: PIL Image: Resized image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) if not (isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)): raise TypeError('Got inappropriate size arg: {}'.format(size)) if isinstance(size, int): w, h = img.size if (w <= h and w == size) or (h <= w and h == size): return img if w < h: ow = size oh = int(size * h / w) return img.resize((ow, oh), interpolation) else: oh = size ow = int(size * w / h) return img.resize((ow, oh), interpolation) else: return img.resize(size[::-1], interpolation)
# example: img_resize_256x256 = resize(img, (256, 256)) # (256, 256) img_resize_256 = resize(img, 256) # (256, 278) # vis ax1 = plt.subplot(1, 3, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 3, 2) ax2.imshow(img_resize_256x256) ax2.axis("off") ax2.set_title("resize_256x256 img") ax3 = plt.subplot(1, 3, 3) ax3.imshow(img_resize_256) ax3.axis("off") ax3.set_title("resize_256 img") plt.show()
2.6 pad(img, padding, fill=0, padding_mode=‘constant')
根據(jù)指定的 padding
模式和填充值,對(duì)給定的 PIL Image
的所有邊進(jìn)行 pad
處理.
參數(shù) padding - int 或 tuple 形式.
padding:
- 如果是 int 值 ,則對(duì)所有的邊都 padding 該 int 值.
- 如果是長(zhǎng)度為 2 的tuple,則對(duì) left/right 和 top/bottom 分別進(jìn)行 padding.
- 如果是長(zhǎng)度為 4 的 tuple,則對(duì) left,top,right, bottom 邊分別進(jìn)行 padding.
參數(shù) fill - 像素填充值,默認(rèn)為 0. 如果值是長(zhǎng)度為 3 的 tuple,則分別對(duì) R,G,B 通道進(jìn)行填充. 僅用于當(dāng) padding_mode='constant'
的情況.
參數(shù) padding_mode - 填充的類型,可選:constant,edge,reflect,symmetric. 默認(rèn)為 constant. 填充常數(shù)值.
constant - padding 填充常數(shù)值 fill.
edge - padding 圖像邊緣的最后一個(gè)值.
reflect - padding 圖像的反射(reflection)值,(不對(duì)圖像邊緣的最后一個(gè)像素值進(jìn)行重復(fù))
如,[1, 2, 3, 4] 在 reflect 模式下在 兩邊 padding 2 個(gè)元素值,會(huì)得到:
[3, 2, 1, 2, 3, 4, 3, 2]
symmetric - padding 圖像的反射(reflection)值,(對(duì)圖像邊緣的最后一個(gè)像素值進(jìn)行重復(fù)).
如,[1, 2, 3, 4] 在 symmetric 模式下在 兩邊 padding 2 個(gè)元素值,會(huì)得到:
[2, 1, 1, 2, 3, 4, 4, 3]
def pad(img, padding, fill=0, padding_mode='constant'): """ Args: img (PIL Image): Image to be padded. padding (int or tuple): Padding on each border. fill: Pixel fill value for constant fill. Default is 0. padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. Returns: PIL Image: Padded image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) if not isinstance(padding, (numbers.Number, tuple)): raise TypeError('Got inappropriate padding arg') if not isinstance(fill, (numbers.Number, str, tuple)): raise TypeError('Got inappropriate fill arg') if not isinstance(padding_mode, str): raise TypeError('Got inappropriate padding_mode arg') if isinstance(padding, Sequence) and len(padding) not in [2, 4]: raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " + "{} element tuple".format(len(padding))) assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'], \ 'Padding mode should be either constant, edge, reflect or symmetric' if padding_mode == 'constant': if img.mode == 'P': palette = img.getpalette() image = ImageOps.expand(img, border=padding, fill=fill) image.putpalette(palette) return image return ImageOps.expand(img, border=padding, fill=fill) else: if isinstance(padding, int): pad_left = pad_right = pad_top = pad_bottom = padding if isinstance(padding, Sequence) and len(padding) == 2: pad_left = pad_right = padding[0] pad_top = pad_bottom = padding[1] if isinstance(padding, Sequence) and len(padding) == 4: pad_left = padding[0] pad_top = padding[1] pad_right = padding[2] pad_bottom = padding[3] if img.mode == 'P': palette = img.getpalette() img = np.asarray(img) img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode) img = Image.fromarray(img) img.putpalette(palette) return img img = np.asarray(img) # RGB image if len(img.shape) == 3: img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), padding_mode) # Grayscale image if len(img.shape) == 2: img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode) return Image.fromarray(img)
# example: img_padding = pad(img, (10, 20, 30 ,40), fill=128) # (750, 815) -> (790, 875) # vis ax1 = plt.subplot(1, 2, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 2, 2) ax2.imshow(img_padding) ax2.axis("off") ax2.set_title("padding img") plt.show()
2.7 crop(img, i, j, h, w)
裁剪給定的 PIL Image.
def crop(img, i, j, h, w): """ Args: img (PIL Image): Image to be cropped. i: Upper pixel coordinate. j: Left pixel coordinate. h: Height of the cropped image. w: Width of the cropped image. Returns: PIL Image: Cropped image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) return img.crop((j, i, j + w, i + h))
# example img_crop = crop(img, 100, 100, 500, 500) # (750, 815) -> (500, 500) ax1 = plt.subplot(1, 2, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 2, 2) ax2.imshow(img_crop) ax2.axis("off") ax2.set_title("crop img") plt.show()
2.8 center_crop(img, output_size)
def center_crop(img, output_size): if isinstance(output_size, numbers.Number): output_size = (int(output_size), int(output_size)) w, h = img.size th, tw = output_size i = int(round((h - th) / 2.)) j = int(round((w - tw) / 2.)) return crop(img, i, j, th, tw)
#example img_centercrop = center_crop(img, (256, 256)) # (750, 815) -> (256, 256) ax1 = plt.subplot(1, 2, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 2, 2) ax2.imshow(img_centercrop) ax2.axis("off") ax2.set_title("centercrop img") plt.show()
2.9 resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR)
對(duì)給定 PIL Image 進(jìn)行裁剪,并 resize 到特定尺寸.
def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR): """ Args: img (PIL Image): Image to be cropped. i: Upper pixel coordinate. j: Left pixel coordinate. h: Height of the cropped image. w: Width of the cropped image. size (sequence or int): Desired output size. Same semantics as ``resize``. interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR``. Returns: PIL Image: Cropped image. """ assert _is_pil_image(img), 'img should be PIL Image' img = crop(img, i, j, h, w) img = resize(img, size, interpolation) return img
# example img_resizedcrop = resized_crop(img, 100, 100, 500, 500, (256, 256)) # (750, 815) -> (500, 500) -> (256, 256) ax1 = plt.subplot(1, 2, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 2, 2) ax2.imshow(img_resizedcrop) ax2.axis("off") ax2.set_title("resizedcrop img") plt.show()
2.10 hflip(img)
水平翻轉(zhuǎn) (Horizontally flip) 給定的 PIL Image.
def hflip(img): """ Args: img (PIL Image): Image to be flipped. Returns: PIL Image: Horizontall flipped image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) return img.transpose(Image.FLIP_LEFT_RIGHT)
2.11 vflip(img)
垂直翻轉(zhuǎn) (Vertically flip) 給定的 PIL Image.
def vflip(img): """ Args: img (PIL Image): Image to be flipped. Returns: PIL Image: Vertically flipped image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) return img.transpose(Image.FLIP_TOP_BOTTOM)
# example: img_hflip = hflip(img) img_vflip = vflip(img) ax1 = plt.subplot(1, 3, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 3, 2) ax2.imshow(img_hflip) ax2.axis("off") ax2.set_title("hflip img") ax3 = plt.subplot(1, 3, 3) ax3.imshow(img_vflip) ax3.axis("off") ax3.set_title("vflip img") plt.show()
2.12 five_crop(img, size)
Crop the given PIL Image into four corners and the central crop.
從給定 PIL Image 的四個(gè)角和中間裁剪出五個(gè)子圖像.
def five_crop(img, size): """ Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. Returns: tuple: tuple (tl, tr, bl, br, center) Corresponding top left, top right, bottom left, bottom right and center crop. """ if isinstance(size, numbers.Number): size = (int(size), int(size)) else: assert len(size) == 2, "Please provide only two dimensions (h, w) for size." w, h = img.size crop_h, crop_w = size if crop_w > w or crop_h > h: raise ValueError("Requested crop size {} is bigger than input size {}".format(size, (h, w))) tl = img.crop((0, 0, crop_w, crop_h)) tr = img.crop((w - crop_w, 0, w, crop_h)) bl = img.crop((0, h - crop_h, crop_w, h)) br = img.crop((w - crop_w, h - crop_h, w, h)) center = center_crop(img, (crop_h, crop_w)) return (tl, tr, bl, br, center)
# example: img_tl, img_tr, img_bl, img_br, img_center = five_crop(img, (400, 400)) ax1 = plt.subplot(2, 3, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(2, 3, 2) ax2.imshow(img_tl) ax2.axis("off") ax2.set_title("tl img") ax3 = plt.subplot(2, 3, 3) ax3.imshow(img_tr) ax3.axis("off") ax3.set_title("tr img") ax4 = plt.subplot(2, 3, 4) ax4.imshow(img_bl) ax4.axis("off") ax4.set_title("bl img") ax5 = plt.subplot(2, 3, 5) ax5.imshow(img_br) ax5.axis("off") ax5.set_title("br img") ax6 = plt.subplot(2, 3, 6) ax6.imshow(img_center) ax6.axis("off") ax6.set_title("center img") plt.show()
2.13 ten_crop(img, size, vertical_flip=False)
將給定 PIL Image 裁剪出的四個(gè)角和中間部分的五個(gè)子圖像,每個(gè)子圖像進(jìn)行翻轉(zhuǎn)處理. 默認(rèn)時(shí)水平翻轉(zhuǎn).
def ten_crop(img, size, vertical_flip=False): """ Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. vertical_flip (bool): Use vertical flipping instead of horizontal Returns: tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip) Corresponding top left, top right, bottom left, bottom right and center crop and same for the flipped image. """ if isinstance(size, numbers.Number): size = (int(size), int(size)) else: assert len(size) == 2, "Please provide only two dimensions (h, w) for size." first_five = five_crop(img, size) if vertical_flip: img = vflip(img) else: img = hflip(img) second_five = five_crop(img, size) return first_five + second_five
2.14 adjust_brightness(img, brightness_factor)
def adjust_brightness(img, brightness_factor): """ Args: img (PIL Image): PIL Image to be adjusted. brightness_factor (float): How much to adjust the brightness. Can be any non negative number. 0 gives a black image, 1 gives the original image, 2 increases the brightness by a factor of 2. Returns: PIL Image: Brightness adjusted image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) enhancer = ImageEnhance.Brightness(img) img = enhancer.enhance(brightness_factor) return img
# example: img_adjust_brightness = adjust_brightness(img, 2.5) # vis ax1 = plt.subplot(1, 2, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 2, 2) ax2.imshow(img_adjust_brightness) ax2.axis("off") ax2.set_title("adjust_brightness img") plt.show()
2.15 adjust_contrast(img, contrast_factor)
調(diào)整對(duì)比度.
def adjust_contrast(img, contrast_factor): """ Args: img (PIL Image): PIL Image to be adjusted. contrast_factor (float): How much to adjust the contrast. Can be any non negative number. 0 gives a solid gray image, 1 gives the original image, 2 increases the contrast by a factor of 2. Returns: PIL Image: Contrast adjusted image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) enhancer = ImageEnhance.Contrast(img) img = enhancer.enhance(contrast_factor) return img
# example: img_adjust_contrast = adjust_contrast(img, 2.5) # vis ax1 = plt.subplot(1, 2, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 2, 2) ax2.imshow(img_adjust_contrast) ax2.axis("off") ax2.set_title("adjust_contrast img") plt.show()
2.16 adjust_saturation(img, saturation_factor)
調(diào)整顏色飽和度.
def adjust_saturation(img, saturation_factor): """ Args: img (PIL Image): PIL Image to be adjusted. saturation_factor (float): How much to adjust the saturation. 0 will give a black and white image, 1 will give the original image while 2 will enhance the saturation by a factor of 2. Returns: PIL Image: Saturation adjusted image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) enhancer = ImageEnhance.Color(img) img = enhancer.enhance(saturation_factor) return img
# example img_adjust_saturation = adjust_saturation(img, 2.5) # vis ax1 = plt.subplot(1, 2, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 2, 2) ax2.imshow(img_adjust_saturation) ax2.axis("off") ax2.set_title("adjust_saturation img") plt.show()
2.17 adjust_hue(img, hue_factor)
調(diào)整圖像 HUE.
通過將圖像轉(zhuǎn)換為 HSV 空間,并周期地移動(dòng)在 hue 通道(H) 的強(qiáng)度,以實(shí)現(xiàn)圖像 hue 的調(diào)整.
最后,再將結(jié)果轉(zhuǎn)換回原始的圖像模式.參數(shù) hue_factor - H 通道平移的因子,其值必須在區(qū)間 [-0.5, 0.5].
def adjust_hue(img, hue_factor): """ Args: img (PIL Image): PIL Image to be adjusted. hue_factor (float): How much to shift the hue channel. Should be in [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in HSV space in positive and negative direction respectively. 0 means no shift. Therefore, both -0.5 and 0.5 will give an image with complementary colors while 0 gives the original image. Returns: PIL Image: Hue adjusted image. """ if not(-0.5 <= hue_factor <= 0.5): raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor)) if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) input_mode = img.mode if input_mode in {'L', '1', 'I', 'F'}: return img h, s, v = img.convert('HSV').split() np_h = np.array(h, dtype=np.uint8) # uint8 addition take cares of rotation across boundaries with np.errstate(over='ignore'): np_h += np.uint8(hue_factor * 255) h = Image.fromarray(np_h, 'L') img = Image.merge('HSV', (h, s, v)).convert(input_mode) return img
# example: img_adjust_hue = adjust_hue(img, 0.5) # vis ax1 = plt.subplot(1, 2, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 2, 2) ax2.imshow(img_adjust_hue) ax2.axis("off") ax2.set_title("adjust_hue img") plt.show()
2.18 adjust_gamma(img, gamma, gain=1)
對(duì)圖像進(jìn)行伽馬校正(gamma correction). 也被叫作 Power Law Transform.
def adjust_gamma(img, gamma, gain=1): """ Args: img (PIL Image): PIL Image to be adjusted. gamma (float): Non negative real number, 如公式中的 \gamma 值. gamma larger than 1 make the shadows darker, while gamma smaller than 1 make dark regions lighter. gain (float): The constant multiplier. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) if gamma < 0: raise ValueError('Gamma should be a non-negative real number') input_mode = img.mode img = img.convert('RGB') gamma_map = [255 * gain * pow(ele / 255., gamma) for ele in range(256)] * 3 img = img.point(gamma_map) # use PIL's point-function to accelerate this part img = img.convert(input_mode) return img
# example: img_adjust_gamma = adjust_gamma(img, 0.5) # vis ax1 = plt.subplot(1, 2, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 2, 2) ax2.imshow(img_adjust_gamma) ax2.axis("off") ax2.set_title("adjust_gamma img") plt.show()
2.19 rotate(img, angle, resample=False, expand=False, center=None)
旋轉(zhuǎn)圖像.
參數(shù) resample
可選值:PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC.
如果參數(shù) resample
被忽略,或圖像的模式是 1 或 P,則resample=PIL.Image.NEAREST.
參數(shù) expand
如果 expand=True,則延展輸出圖像,以能包含旋轉(zhuǎn)后的全部圖像.
如果 expand=False 或被忽略,則保持輸出圖像與輸入圖像的尺寸一致.
expand 假設(shè)旋轉(zhuǎn)是以中心進(jìn)行旋轉(zhuǎn),且沒有平移.
def rotate(img, angle, resample=False, expand=False, center=None): """ Args: img (PIL Image): PIL Image to be rotated. angle (float or int): In degrees degrees counter clockwise order. resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional): expand (bool, optional): Optional expansion flag. center (2-tuple, optional): Optional center of rotation. Origin is the upper left corner. Default is the center of the image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) return img.rotate(angle, resample, expand, center)
# example: img_rotate = rotate(img, 60) # vis ax1 = plt.subplot(1, 2, 1) ax1.imshow(img) ax1.axis("off") ax1.set_title("orig img") ax2 = plt.subplot(1, 2, 2) ax2.imshow(img_rotate) ax2.axis("off") ax2.set_title("rotate img") plt.show()
2.20 affine(img, angle, translate, scale, shear, resample=0, fillcolor=None)
保持圖像中心不變,進(jìn)行仿射變換.
def _get_inverse_affine_matrix(center, angle, translate, scale, shear): # Helper method to compute inverse matrix for affine transformation # As it is explained in PIL.Image.rotate # We need compute INVERSE of affine transformation matrix: M = T * C * RSS * C^-1 # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1] # RSS is rotation with scale and shear matrix # RSS(a, scale, shear) = [ cos(a)*scale -sin(a + shear)*scale 0] # [ sin(a)*scale cos(a + shear)*scale 0] # [ 0 0 1] # Thus, the inverse is M^-1 = C * RSS^-1 * C^-1 * T^-1 angle = math.radians(angle) shear = math.radians(shear) scale = 1.0 / scale # Inverted rotation matrix with scale and shear d = math.cos(angle + shear) * math.cos(angle) + math.sin(angle + shear) * math.sin(angle) matrix = [ math.cos(angle + shear), math.sin(angle + shear), 0, -math.sin(angle), math.cos(angle), 0 ] matrix = [scale / d * m for m in matrix] # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1 matrix[2] += matrix[0] * (-center[0] - translate[0]) + matrix[1] * (-center[1] - translate[1]) matrix[5] += matrix[3] * (-center[0] - translate[0]) + matrix[4] * (-center[1] - translate[1]) # Apply center translation: C * RSS^-1 * C^-1 * T^-1 matrix[2] += center[0] matrix[5] += center[1] return matrix def affine(img, angle, translate, scale, shear, resample=0, fillcolor=None): """ Args: img (PIL Image): PIL Image to be rotated. angle (float or int): rotation angle in degrees between -180 and 180, clockwise direction. translate (list or tuple of integers): horizontal and vertical translations (post-rotation translation) scale (float): overall scale shear (float): shear angle value in degrees between -180 to 180, clockwise direction. resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional): fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0) """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) assert isinstance(translate, (tuple, list)) and len(translate) == 2, \ "Argument translate should be a list or tuple of length 2" assert scale > 0.0, "Argument scale should be positive" output_size = img.size center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5) matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear) kwargs = {"fillcolor": fillcolor} if PILLOW_VERSION[0] == '5' else {} return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs)
2.21 to_grayscale(img, num_output_channels=1)
將圖像轉(zhuǎn)換為灰度圖.
def to_grayscale(img, num_output_channels=1): """ Args: img (PIL Image): Image to be converted to grayscale. Returns: PIL Image: Grayscale version of the image. if num_output_channels = 1 : returned image is single channel if num_output_channels = 3 : returned image is 3 channel with r = g = b """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) if num_output_channels == 1: img = img.convert('L') elif num_output_channels == 3: img = img.convert('L') np_img = np.array(img, dtype=np.uint8) np_img = np.dstack([np_img, np_img, np_img]) img = Image.fromarray(np_img, 'RGB') else: raise ValueError('num_output_channels should be either 1 or 3') return img
參考鏈接
https://www.aiuai.cn/aifarm759.html
到此這篇關(guān)于Pytorch 圖像變換函數(shù)集合小結(jié)的文章就介紹到這了,更多相關(guān)Pytorch 圖像變換函數(shù)內(nèi)容請(qǐng)搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!
- pytorch中的nn.ZeroPad2d()零填充函數(shù)實(shí)例詳解
- Pytorch上下采樣函數(shù)--interpolate用法
- pytorch 常用函數(shù) max ,eq說明
- Pytorch十九種損失函數(shù)的使用詳解
- pytorch之Resize()函數(shù)具體使用詳解
- 使用 pytorch 創(chuàng)建神經(jīng)網(wǎng)絡(luò)擬合sin函數(shù)的實(shí)現(xiàn)
- Pytorch mask_select 函數(shù)的用法詳解
- PyTorch筆記之scatter()函數(shù)的使用
- pytorch方法測(cè)試——激活函數(shù)(ReLU)詳解
- pytorch 常用線性函數(shù)詳解
- 使用Pytorch來擬合函數(shù)方式
- 如何利用Pytorch計(jì)算三角函數(shù)
相關(guān)文章
python編程學(xué)習(xí)使用管道Pipe編寫優(yōu)化代碼
大家好,今天這篇文章我將詳細(xì)講解 Pipe 如何讓你的代碼更加簡(jiǎn)潔的方法,有需要的朋友可以借鑒參考下,希望能夠有所幫助,祝大家多多進(jìn)步2021-11-11一篇文章帶你了解python標(biāo)準(zhǔn)庫(kù)--datetime模塊
這篇文章主要為大家介紹了python中的datetime模塊,datetime模塊的接口則更直觀、更容易調(diào)用,想要了解datetime模塊的朋友可以參考一下2021-08-08完美解決TensorFlow和Keras大數(shù)據(jù)量?jī)?nèi)存溢出的問題
這篇文章主要介紹了完美解決TensorFlow和Keras大數(shù)據(jù)量?jī)?nèi)存溢出的問題,具有很好的參考價(jià)值,希望對(duì)大家有所幫助。一起跟隨小編過來看看吧2020-07-07一文教你將Visual Studio Code變成Python開發(fā)神器
Visual Studio Code 是一款功能強(qiáng)大、可擴(kuò)展且輕量級(jí)的代碼編輯器,經(jīng)過多年的發(fā)展,已經(jīng)成為 Python 社區(qū)的首選代碼編輯器之一。本文將為大家介紹一下如何將Visual Studio Code變成Python開發(fā)神器,需要的可以參考一下2022-07-07Python 實(shí)現(xiàn)一行輸入多個(gè)值的方法
下面小編就為大家分享一篇Python 實(shí)現(xiàn)一行輸入多個(gè)值的方法,具有很好的參考價(jià)值,希望對(duì)大家有所幫助。一起跟隨小編過來看看吧2018-04-04如何關(guān)掉pycharm中的python console(圖解)
本文通過圖文并茂的形式給大家介紹了如何關(guān)掉pycharm中的python console,非常不錯(cuò),具有一定的參考借鑒價(jià)值,需要的朋友可以參考下2019-10-10