Python實現(xiàn)GPU加速圖像處理的代碼詳解
更新時間:2025年04月08日 09:14:50 作者:ak啊
這篇文章主要為大家詳細介紹了Python實現(xiàn)GPU加速圖像處理的相關(guān)知識,文中的示例代碼講解詳細,感興趣的小伙伴可以跟隨小編一起學(xué)習一下
1. 使用 PyTorch 實現(xiàn) GPU 加速的卷積濾波(如邊緣檢測)
import torch import torch.nn as nn import cv2 import numpy as np # 檢查 GPU 是否可用 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # 讀取圖像并轉(zhuǎn)換為 PyTorch 張量 image = cv2.imread("input.jpg") # 讀取 BGR 格式圖像 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # 轉(zhuǎn)為 RGB image_tensor = torch.from_numpy(image).float().permute(2, 0, 1) # HWC -> CHW image_tensor = image_tensor.unsqueeze(0).to(device) # 添加 batch 維度并移至 GPU # 定義邊緣檢測卷積核(Sobel算子) conv_layer = nn.Conv2d( in_channels=3, out_channels=3, kernel_size=3, bias=False, padding=1 ).to(device) # 手動設(shè)置 Sobel 核權(quán)重(示例,僅作用于水平邊緣) sobel_kernel = torch.tensor([ [[[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]], # Red 通道 [[[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]], # Green 通道 [[[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]], # Blue 通道 ], dtype=torch.float32).repeat(3, 1, 1, 1).to(device) conv_layer.weight.data = sobel_kernel # 執(zhí)行卷積操作(GPU加速) with torch.no_grad(): output_tensor = conv_layer(image_tensor) # 將結(jié)果轉(zhuǎn)換回 numpy 并保存 output = output_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy() output = np.clip(output, 0, 255).astype(np.uint8) cv2.imwrite("edge_detection_gpu.jpg", cv2.cvtColor(output, cv2.COLOR_RGB2BGR))
2. 使用 OpenCV 的 CUDA 模塊加速高斯模糊
import cv2 import time # 檢查 OpenCV 是否支持 CUDA print("CUDA devices:", cv2.cuda.getCudaEnabledDeviceCount()) # 讀取圖像并上傳到 GPU image = cv2.imread("input.jpg") gpu_image = cv2.cuda_GpuMat() gpu_image.upload(image) # 創(chuàng)建 GPU 加速的高斯濾波器 gaussian_filter = cv2.cuda.createGaussianFilter( cv2.CV_8UC3, # 輸入類型 (8-bit unsigned, 3 channels) cv2.CV_8UC3, # 輸出類型 (15, 15), # 核大小 0 # Sigma(自動計算) ) # 執(zhí)行濾波(重復(fù)多次測試速度) start_time = time.time() for _ in range(100): # 重復(fù) 100 次模擬大數(shù)據(jù)量 gpu_blur = gaussian_filter.apply(gpu_image) end_time = time.time() # 下載結(jié)果到 CPU 并保存 result = gpu_blur.download() print(f"GPU Time: {end_time - start_time:.4f} seconds") cv2.imwrite("blur_gpu.jpg", result)
3. 使用 CuPy 加速圖像傅里葉變換
import cupy as cp import cv2 import numpy as np import time # 讀取圖像并轉(zhuǎn)為灰度 image = cv2.imread("input.jpg", cv2.IMREAD_GRAYSCALE) # 將 numpy 數(shù)組轉(zhuǎn)為 CuPy 數(shù)組(上傳到 GPU) image_gpu = cp.asarray(image) # 快速傅里葉變換(FFT)和逆變換(IFFT) start_time = time.time() fft_gpu = cp.fft.fft2(image_gpu) fft_shift = cp.fft.fftshift(fft_gpu) magnitude_spectrum = cp.log(cp.abs(fft_shift)) end_time = time.time() # 將結(jié)果轉(zhuǎn)回 CPU magnitude_cpu = cp.asnumpy(magnitude_spectrum) print(f"GPU FFT Time: {end_time - start_time:.4f} seconds") # 歸一化并保存頻譜圖 magnitude_cpu = cv2.normalize(magnitude_cpu, None, 0, 255, cv2.NORM_MINMAX) cv2.imwrite("fft_spectrum_gpu.jpg", magnitude_cpu.astype(np.uint8))
4. 使用 Numba 編寫自定義 GPU 核函數(shù)(圖像反色)
from numba import cuda import numpy as np import cv2 import time # 讀取圖像 image = cv2.imread("input.jpg") height, width, channels = image.shape # 定義 GPU 核函數(shù) @cuda.jit def invert_colors_kernel(image): x, y = cuda.grid(2) if x < image.shape[0] and y < image.shape[1]: for c in range(3): # 遍歷 RGB 通道 image[x, y, c] = 255 - image[x, y, c] # 將圖像上傳到 GPU image_gpu = cuda.to_device(image) # 配置線程和塊 threads_per_block = (16, 16) blocks_per_grid_x = (height + threads_per_block[0] - 1) // threads_per_block[0] blocks_per_grid_y = (width + threads_per_block[1] - 1) // threads_per_block[1] blocks_per_grid = (blocks_per_grid_x, blocks_per_grid_y) # 執(zhí)行核函數(shù) start_time = time.time() invert_colors_kernel[blocks_per_grid, threads_per_block](image_gpu) cuda.synchronize() # 等待 GPU 完成 end_time = time.time() # 下載結(jié)果并保存 image_cpu = image_gpu.copy_to_host() print(f"GPU Invert Time: {end_time - start_time:.6f} seconds") cv2.imwrite("inverted_gpu.jpg", image_cpu)
5. 使用 PyTorch 實現(xiàn)實時風格遷移(GPU加速)
import torch import torchvision.models as models from torchvision import transforms from PIL import Image # 加載預(yù)訓(xùn)練模型到 GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = models.vgg19(pretrained=True).features.to(device).eval() # 圖像預(yù)處理 preprocess = transforms.Compose([ transforms.Resize(512), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # 加載內(nèi)容圖像和風格圖像 content_image = Image.open("content.jpg") style_image = Image.open("style.jpg") # 將圖像轉(zhuǎn)為張量并移至 GPU content_tensor = preprocess(content_image).unsqueeze(0).to(device) style_tensor = preprocess(style_image).unsqueeze(0).to(device) # 定義風格遷移函數(shù)(示例,需完整實現(xiàn)損失計算和優(yōu)化) def style_transfer(model, content_input, style_input, iterations=500): # 創(chuàng)建可優(yōu)化圖像 input_image = content_input.clone().requires_grad_(True) # 定義優(yōu)化器 optimizer = torch.optim.LBFGS([input_image]) # 風格遷移循環(huán) for i in range(iterations): def closure(): optimizer.zero_grad() # 提取特征并計算損失(需實現(xiàn)具體細節(jié)) # ... return total_loss optimizer.step(closure) return input_image # 執(zhí)行風格遷移(需補充完整代碼) output_image = style_transfer(model, content_tensor, style_tensor) # 后處理并保存結(jié)果 output_image = output_image.squeeze().cpu().detach() output_image = transforms.ToPILImage()(output_image) output_image.save("style_transfer_gpu.jpg")
關(guān)鍵說明
1.硬件依賴:需 NVIDIA GPU 并安裝正確版本的 CUDA 和 cuDNN。
2.庫安裝:
pip install torch torchvision opencv-python-headless cupy numba
3.性能對比:與 CPU 版本相比,GPU 加速通常快 10-100 倍(取決于任務(wù)復(fù)雜度)。
4.適用場景:
- PyTorch:適合深度學(xué)習相關(guān)的圖像處理(如 GAN、超分辨率)。
- OpenCV CUDA:適合傳統(tǒng)圖像處理加速(濾波、特征提?。?/li>
- CuPy/Numba:適合自定義數(shù)值計算或科研算法。
到此這篇關(guān)于Python實現(xiàn)GPU加速圖像處理的代碼詳解的文章就介紹到這了,更多相關(guān)Python GPU加速圖像處理內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!