cudaPointerGetAttributes详解
cudaPointerGetAttributes
是 CUDA 运行时 API 中的一个重要函数,用于查询指针的内存属性。以下是关于该函数的详细说明和正确使用方法:
函数原型
c
cudaError_t cudaPointerGetAttributes(cudaPointerAttributes* attributes, const void* ptr
);
参数说明
参数 | 类型 | 说明 |
---|---|---|
attributes | cudaPointerAttributes* | 输出参数,存储指针属性 |
ptr | const void* | 要查询的指针 |
cudaPointerAttributes
结构体
c
struct cudaPointerAttributes {enum cudaMemoryType type; // 内存类型int device; // 设备号void* devicePointer; // 设备指针void* hostPointer; // 主机指针int isManaged; // 是否统一内存
};
内存类型 (cudaMemoryType
)
-
cudaMemoryTypeHost
:主机内存 -
cudaMemoryTypeDevice
:设备内存 -
cudaMemoryTypeManaged
:统一内存
特性 | 传统内存 | 统一内存 |
---|---|---|
内存分配 | malloc (主机), cudaMalloc (设备) | cudaMallocManaged |
数据拷贝 | 需要显式cudaMemcpy | 自动迁移 |
指针使用 | 需要不同指针 | 同一指针 |
适用场景 | 精细控制内存时 | 简化开发时 |
正确使用示例
1. 基础用法:检查指针类型
cpp
void checkPointer(const void* ptr) {cudaPointerAttributes attrs;cudaError_t err = cudaPointerGetAttributes(&attrs, ptr);if (err != cudaSuccess) {printf("Error: %s\n", cudaGetErrorString(err));return;}switch(attrs.type) {case cudaMemoryTypeHost:printf("Host memory pointer\n");break;case cudaMemoryTypeDevice:printf("Device memory pointer\n");printf("Allocated on device %d\n", attrs.device);break;case cudaMemoryTypeManaged:printf("Unified memory pointer\n");break;default:printf("Unknown memory type\n");}
}
2. 在NPP函数调用前的验证
cpp
void validateNPPInputs(const Npp8u* d_src, const Npp8u* d_dst, NppiSize roi) {// 检查源指针cudaPointerAttributes srcAttrs;cudaPointerGetAttributes(&srcAttrs, d_src);if (srcAttrs.type != cudaMemoryTypeDevice) {throw std::runtime_error("Source pointer is not device memory");}// 检查目标指针cudaPointerAttributes dstAttrs;cudaPointerGetAttributes(&dstAttrs, d_dst);if (dstAttrs.type != cudaMemoryTypeDevice) {throw std::runtime_error("Destination pointer is not device memory");}// 检查ROI有效性if (roi.width <= 0 || roi.height <= 0) {throw std::runtime_error("Invalid ROI size");}
}
3. 调试LUT内存问题
cpp
void debugLUTPointers(const Npp32s* lut[4]) {for (int i = 0; i < 4; i++) {cudaPointerAttributes attrs;cudaPointerGetAttributes(&attrs, lut[i]);printf("LUT channel %d: ", i);if (attrs.type == cudaMemoryTypeDevice) {printf("Valid device pointer on device %d\n", attrs.device);} else {printf("INVALID (type=%d)\n", attrs.type);}}
}
关键注意事项
-
指针有效性:
-
对无效指针调用此函数可能返回
cudaErrorInvalidValue
-
建议先检查指针是否为NULL
-
-
多GPU环境:
cpp
// 检查指针是否在当前设备上 int currentDevice; cudaGetDevice(¤tDevice); if (attrs.device != currentDevice) {printf("Warning: Pointer is on device %d but current is %d\n", attrs.device, currentDevice); }
-
统一内存的特殊处理:
cpp
if (attrs.isManaged) {printf("Pointer is in unified memory\n"); }
-
错误处理最佳实践:
cpp
#define CUDA_CHECK(call) \ do { \cudaError_t err = (call); \if (err != cudaSuccess) { \fprintf(stderr, "CUDA error at %s:%d code=%d(%s)\n", \__FILE__, __LINE__, err, cudaGetErrorString(err)); \exit(1); \} \ } while(0)CUDA_CHECK(cudaPointerGetAttributes(&attrs, ptr));
典型应用场景
1. 安全的内存拷贝
cpp
void safeMemcpy(void* dst, const void* src, size_t count) {cudaPointerAttributes srcAttrs, dstAttrs;CUDA_CHECK(cudaPointerGetAttributes(&srcAttrs, src));CUDA_CHECK(cudaPointerGetAttributes(&dstAttrs, dst));enum cudaMemcpyKind kind;if (srcAttrs.type == cudaMemoryTypeHost && dstAttrs.type == cudaMemoryTypeDevice) {kind = cudaMemcpyHostToDevice;} // 其他情况判断...CUDA_CHECK(cudaMemcpy(dst, src, count, kind));
}
2. 自动内存释放工具
cpp
class CudaPtr {void* ptr_;
public:explicit CudaPtr(size_t size) {cudaPointerAttributes attrs;if (cudaPointerGetAttributes(&attrs, ptr_) == cudaSuccess) {throw std::logic_error("Pointer already allocated");}CUDA_CHECK(cudaMalloc(&ptr_, size));}~CudaPtr() {if (ptr_) {cudaPointerAttributes attrs;if (cudaPointerGetAttributes(&attrs, ptr_) == cudaSuccess) {cudaFree(ptr_);}}}
};
性能考虑
-
频繁调用开销:
-
该函数会同步设备操作
-
避免在性能关键代码中频繁调用
-
-
替代方案:
cpp
// 对于已知的内存类型,可以缓存结果 static std::unordered_map<const void*, cudaPointerAttributes> ptrCache;const cudaPointerAttributes& getCachedAttributes(const void* ptr) {auto it = ptrCache.find(ptr);if (it == ptrCache.end()) {cudaPointerAttributes attrs;CUDA_CHECK(cudaPointerGetAttributes(&attrs, ptr));ptrCache[ptr] = attrs;}return ptrCache[ptr]; }
通过正确使用cudaPointerGetAttributes
,您可以有效验证内存指针属性,避免常见的CUDA内存错误。