当前位置: 首页 > news >正文

llama.cpp学习笔记:后端加载

单例
struct ggml_backend_registry {std::vector<ggml_backend_reg_entry> backends;std::vector<ggml_backend_dev_t> devices;// ...
}struct ggml_backend_reg_entry {ggml_backend_reg_t reg;dl_handle_ptr handle;
};typedef struct ggml_backend_reg * ggml_backend_reg_t;typedef struct ggml_backend_device * ggml_backend_dev_t;
struct ggml_backend_device {struct ggml_backend_device_i iface;ggml_backend_reg_t reg;void * context;
};

每种后端一个单例:cuda、cpu等

版本号、接口和上下文信息

    struct ggml_backend_reg {int api_version; // initialize to GGML_BACKEND_API_VERSIONstruct ggml_backend_reg_i iface;void * context;};

后端对应的设备:接口、所属后端和上下文信息

typedef struct ggml_backend_device * ggml_backend_dev_t;
struct ggml_backend_device {struct ggml_backend_device_i iface;ggml_backend_reg_t reg;void * context;
};

cuda后端

cuda后端iface

static const ggml_backend_reg_i ggml_backend_cuda_reg_interface = {/* .get_name          = */ ggml_backend_cuda_reg_get_name,/* .get_device_count  = */ ggml_backend_cuda_reg_get_device_count,/* .get_device        = */ ggml_backend_cuda_reg_get_device,/* .get_proc_address  = */ ggml_backend_cuda_reg_get_proc_address,
};

cuda后端context,存储多个cuda设备(GPU)的接口、所属后端和上下文等信息

struct ggml_backend_cuda_reg_context {std::vector<ggml_backend_dev_t> devices;
};

 cuda设备GPU的接口iface

static const ggml_backend_device_i ggml_backend_cuda_device_interface = {/* .get_name                = */ ggml_backend_cuda_device_get_name,/* .get_description         = */ ggml_backend_cuda_device_get_description,/* .get_memory              = */ ggml_backend_cuda_device_get_memory,/* .get_type                = */ ggml_backend_cuda_device_get_type,/* .get_props               = */ ggml_backend_cuda_device_get_props,/* .init_backend            = */ ggml_backend_cuda_device_init_backend,/* .get_buffer_type         = */ ggml_backend_cuda_device_get_buffer_type,/* .get_host_buffer_type    = */ ggml_backend_cuda_device_get_host_buffer_type,/* .buffer_from_host_ptr    = */ NULL,/* .supports_op             = */ ggml_backend_cuda_device_supports_op,/* .supports_buft           = */ ggml_backend_cuda_device_supports_buft,/* .offload_op              = */ ggml_backend_cuda_device_offload_op,/* .event_new               = */ ggml_backend_cuda_device_event_new,/* .event_free              = */ ggml_backend_cuda_device_event_free,/* .event_synchronize       = */ ggml_backend_cuda_device_event_synchronize,
};

cuda设备GPU的context

struct ggml_backend_cuda_device_context {int device;std::string name;std::string description;
};

 

CPU后端

CPU后端iface

static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {/* .get_name         = */ ggml_backend_cpu_reg_get_name,/* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,/* .get_device       = */ ggml_backend_cpu_reg_get_device,/* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
};

CPU后端无context

CPU设备接口iface

static const struct ggml_backend_device_i ggml_backend_cpu_device_i = {/* .get_name             = */ ggml_backend_cpu_device_get_name,/* .get_description      = */ ggml_backend_cpu_device_get_description,/* .get_memory           = */ ggml_backend_cpu_device_get_memory,/* .get_type             = */ ggml_backend_cpu_device_get_type,/* .get_props            = */ ggml_backend_cpu_device_get_props,/* .init_backend         = */ ggml_backend_cpu_device_init_backend,/* .get_buffer_type      = */ ggml_backend_cpu_device_get_buffer_type,/* .get_host_buffer_type = */ NULL,/* .buffer_from_host_ptr = */ ggml_backend_cpu_device_buffer_from_host_ptr,/* .supports_op          = */ ggml_backend_cpu_device_supports_op,/* .supports_buft        = */ ggml_backend_cpu_device_supports_buft,/* .offload_op           = */ NULL,/* .event_new            = */ NULL,/* .event_free           = */ NULL,/* .event_synchronize    = */ NULL,
};

CPU设备CPU的context

struct ggml_backend_cpu_device_context {std::string description = "CPU";// ...
}

http://www.xdnf.cn/news/1074349.html

相关文章:

  • 图书管理系统练习项目源码-前后端分离-使用node.js来做后端开发
  • Conda 环境配置之 -- Mamba安装(causal-conv1d、mamba_ssm 最简单配置方法)-- 不需要重新配置CDUA
  • 领域驱动设计(DDD)【26】之CQRS模式初探
  • AlpineLinux安装部署elasticsearch
  • Kafka4.0初体验
  • Python爬虫:Requests与Beautiful Soup库详解
  • 重写(Override)与重载(Overload)深度解析
  • 【C++】C++中的友元函数和友元类
  • 71. 简化路径 —day94
  • Bugku——WEB篇(持续更新ing)
  • documents4j导出pdf
  • Ubuntu服务器(公网)- Ubuntu客户端(内网)的FRP内网穿透配置教程
  • 数据结构 哈希表、栈的应用与链式队列 6.29 (尾)
  • 现代 JavaScript (ES6+) 入门到实战(八):总结与展望 - 成为一名现代前端开发者
  • day46/60
  • H3C-路由器交换机-中继
  • 计算机组成原理与体系结构-实验一 进位加法器(Proteus 8.15)
  • 5 c++核心——文件操作
  • MySQL技巧
  • 如何优化RK3588集群的性能?支持12个RK3588云手机阵列
  • C++ 格式化输入输出
  • Java中对JSON的操作
  • 模拟多维物理过程与基于云的数值分析-AI云计算数值分析和代码验证
  • SpringCloud系列(41)--SpringCloud Config分布式配置中心简介
  • TCP/UDP协议深度解析(三):TCP流量控制的魔法—滑动窗口、拥塞控制与ACK的智慧
  • Java笔记
  • 野生动物检测数据集介绍-5,138张图片 野生动物保护监测 智能狩猎相机系统 生态研究与调查
  • 贝叶斯自学笔记——基础工具篇(一)
  • Python爬虫实战:研究Bleach库相关技术
  • 【linux】权限深入解析