CUDA学习(三)：查询GPU设备

技术2022-07-10 175

博主CUDA学习系列汇总传送门（持续更新）：编程语言|CUDA入门

文章目录

一、cudaDeviceProp 类二、Demo获取显卡名称

本章节学习内容： 1、CudaDeviceProp类 2、如何查询GPU设备

一、cudaDeviceProp 类

全靠荡，中文不全可以看英文

/** * CUDA device properties */ struct __device_builtin__ cudaDeviceProp { char name[256]; /**< 设备名称，比如1080Ti ASCII string identifying device */ cudaUUID_t uuid; /**< 16-byte unique identifier */ char luid[8]; /**< 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms */ unsigned int luidDeviceNodeMask; /**< LUID device node mask. Value is undefined on TCC and non-Windows platforms */ size_t totalGlobalMem; /**< 设备上全局内存的总量，单位是字节 Global memory available on device in bytes */ size_t sharedMemPerBlock; /**< 在一个线程块Block中可使用的最大共享内存数量* Shared memory available per block in bytes / int regsPerBlock; /**< 每个线程块中可用的32位寄存器数量 32-bit registers available per block */ int warpSize; /**< 在一个线程束Warp中包含的线程数量 Warp size in threads */ size_t memPitch; /**< 在内存复制中最大的修正量Pitch，单位为字节 Maximum pitch in bytes allowed by memory copies */ int maxThreadsPerBlock; /**< 在一个线程块中可以包含的最大线程数量 Maximum number of threads per block */ int maxThreadsDim[3]; /**< 在多维线程数组中，每一维可以包含的最大线程数量 Maximum size of each dimension of a block */ int maxGridSize[3]; /**< 在一个线程格Grid，每一维可以包含的最大线程数量 Maximum size of each dimension of a grid */ int clockRate; /**< Clock frequency in kilohertz */ size_t totalConstMem; /**< 常亮内存总量 Constant memory available on device in bytes */ int major; /**< 设备计算功能集的主板号 Major compute capability */ int minor; /**< 设备计算功能集的此版本号 Minor compute capability */ size_t textureAlignment; /**< 设备的纹理对齐需求Alignment requirement for textures */ size_t texturePitchAlignment; /**< Pitch alignment requirement for texture references bound to pitched memory */ int deviceOverlap; /**< bool类型，表示设备是否可以同时执行一个cudamemery调用和一个核函数调用 Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. */ int multiProcessorCount; /**< 设备上多处理器的数量 Number of multiprocessors on device */ int kernelExecTimeoutEnabled; /**< bool类型，表示该设备上的核函数是否存在运行时间限制 Specified whether there is a run time limit on kernels */ int integrated; /**< bool，设备是否是一个集成GPU Device is integrated as opposed to discrete */ int canMapHostMemory; /**< bool，表示设备是否将主机内存映射到CUDA设备地址空间 Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer */ int computeMode; /**< 设备的计算模式，默认（Default），独占（Exclusize），禁止（Prohibited）Compute mode (See ::cudaComputeMode) */ int maxTexture1D; /**< 一维纹理的最大大小 Maximum 1D texture size */ int maxTexture1DMipmap; /**< Maximum 1D mipmapped texture size */ int maxTexture1DLinear; /**< Maximum size for 1D textures bound to linear memory */ int maxTexture2D[2]; /**< 二维纹理的最大维数Maximum 2D texture dimensions */ int maxTexture2DMipmap[2]; /**< Maximum 2D mipmapped texture dimensions */ int maxTexture2DLinear[3]; /**< Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory */ int maxTexture2DGather[2]; /**< Maximum 2D texture dimensions if texture gather operations have to be performed */ int maxTexture3D[3]; /**< 三维纹理的最大维数 Maximum 3D texture dimensions */ int maxTexture3DAlt[3]; /**< Maximum alternate 3D texture dimensions */ int maxTextureCubemap; /**< Maximum Cubemap texture dimensions */ int maxTexture1DLayered[2]; /**< Maximum 1D layered texture dimensions */ int maxTexture2DLayered[3]; /**< Maximum 2D layered texture dimensions */ int maxTextureCubemapLayered[2];/**< Maximum Cubemap layered texture dimensions */ int maxSurface1D; /**< Maximum 1D surface size */ int maxSurface2D[2]; /**< Maximum 2D surface dimensions */ int maxSurface3D[3]; /**< Maximum 3D surface dimensions */ int maxSurface1DLayered[2]; /**< Maximum 1D layered surface dimensions */ int maxSurface2DLayered[3]; /**< Maximum 2D layered surface dimensions */ int maxSurfaceCubemap; /**< Maximum Cubemap surface dimensions */ int maxSurfaceCubemapLayered[2];/**< Maximum Cubemap layered surface dimensions */ size_t surfaceAlignment; /**< Alignment requirements for surfaces */ int concurrentKernels; /**< bool，表示设备是否支持在同一个上下文中同时执行多个核函数 Device can possibly execute multiple kernels concurrently */ int ECCEnabled; /**< Device has ECC support enabled */ int pciBusID; /**< PCI bus ID of the device */ int pciDeviceID; /**< PCI device ID of the device */ int pciDomainID; /**< PCI domain ID of the device */ int tccDriver; /**< 1 if device is a Tesla device using TCC driver, 0 otherwise */ int asyncEngineCount; /**< Number of asynchronous engines */ int unifiedAddressing; /**< Device shares a unified address space with the host */ int memoryClockRate; /**< Peak memory clock frequency in kilohertz */ int memoryBusWidth; /**< Global memory bus width in bits */ int l2CacheSize; /**< Size of L2 cache in bytes */ int maxThreadsPerMultiProcessor;/**< Maximum resident threads per multiprocessor */ int streamPrioritiesSupported; /**< Device supports stream priorities */ int globalL1CacheSupported; /**< Device supports caching globals in L1 */ int localL1CacheSupported; /**< Device supports caching locals in L1 */ size_t sharedMemPerMultiprocessor; /**< Shared memory available per multiprocessor in bytes */ int regsPerMultiprocessor; /**< 32-bit registers available per multiprocessor */ int managedMemory; /**< Device supports allocating managed memory on this system */ int isMultiGpuBoard; /**< Device is on a multi-GPU board */ int multiGpuBoardGroupID; /**< Unique identifier for a group of devices on the same multi-GPU board */ int hostNativeAtomicSupported; /**< Link between the device and the host supports native atomic operations */ int singleToDoublePrecisionPerfRatio; /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */ int pageableMemoryAccess; /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */ int concurrentManagedAccess; /**< Device can coherently access managed memory concurrently with the CPU */ int computePreemptionSupported; /**< Device supports Compute Preemption */ int canUseHostPointerForRegisteredMem; /**< Device can access host registered memory at the same virtual address as the CPU */ int cooperativeLaunch; /**< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel */ int cooperativeMultiDeviceLaunch; /**< Device can participate in cooperative kernels launched via ::cudaLaunchCooperativeKernelMultiDevice */ size_t sharedMemPerBlockOptin; /**< Per device maximum shared memory per block usable by special opt in */ int pageableMemoryAccessUsesHostPageTables; /**< Device accesses pageable memory via the host's page tables */ int directManagedMemAccessFromHost; /**< Host can directly access managed memory on the device without migration. */ };

二、Demo获取显卡名称

#include <iostream> #include "cuda_runtime.h" #include "device_launch_parameters.h" int main() { cudaError_t cudaStatus; cudaDeviceProp cuInfo; // gpu属性的类 int l32count; cudaStatus = cudaGetDeviceCount( &l32count); // 获取GPU数量 if(cudaStatus != cudaSuccess) { fprintf(stderr, "cudaGetDeviceCount failed!"); return 1; } std::cout << "the number of gpu: " << l32count << std::endl; for(int i=0; i<l32count; ++i) { cudaStatus = cudaGetDeviceProperties(&cuInfo, i); // 获取GPU信息 if(cudaStatus != cudaSuccess) { fprintf(stderr, "cudaGetDeviceProperties failed!"); return 1; } printf("Name: %s\n", cuInfo.name); } std::cout << "Hello, World!" << std::endl; return 0; }

Processed: 0.012, SQL: 10