当前位置：首页 > java >正文

【C++ python cython】C++如何调用python,python 运行速度如何提高?

java 2025/7/26 10:46:42

Cython 学习指南

什么是 Cython

Cython 是 Python 的一个编程语言扩展，它允许您编写类似 Python 的代码，但可以编译成 C 代码，从而获得接近 C 语言的性能。

主要特点：

性能提升：比纯 Python 代码快 10-100 倍
简单易学：语法接近 Python
C 兼容性：可以直接调用 C 库
NumPy 集成：对科学计算友好

适用场景：

计算密集型任务
数值计算和科学计算
需要调用 C 库的项目
性能瓶颈优化

安装 Cython

方法一：使用 pip

pip install cython

方法二：使用 conda

conda install cython

验证安装

import cython
print(cython.__version__)

基础语法

文件扩展名

.pyx：Cython 源文件
.pxd：Cython 头文件（类似 C 的 .h 文件）
.pxi：Cython 包含文件

基本语法示例

# hello.pyx
def say_hello(name):"""简单的 Cython 函数"""return f"Hello, {name}!"# 定义变量类型
def add_numbers(int a, int b):cdef int result = a + breturn result

数据类型

基本类型声明

# 整数类型
cdef int i = 10
cdef long long big_number = 1000000000000# 浮点类型
cdef float f = 3.14
cdef double d = 3.141592653589793# 字符和字符串
cdef char c = 'A'
cdef char* s = "Hello"# 布尔类型
cdef bint flag = True  # bint 是 Cython 的布尔类型

数组和指针

# 静态数组
cdef int arr[100]
cdef double matrix[10][10]# 动态内存分配
from libc.stdlib cimport malloc, freecdef int* dynamic_array = <int*>malloc(1000 * sizeof(int))
# 使用完后记得释放内存
free(dynamic_array)

NumPy 数组

import numpy as np
cimport numpy as cnpdef process_array(cnp.ndarray[cnp.double_t, ndim=1] arr):cdef int icdef int n = arr.shape[0]for i in range(n):arr[i] = arr[i] * 2return arr

函数定义

普通函数（def）

def python_function(x):"""可以被 Python 调用的函数"""return x * 2

C 函数（cdef）

cdef int c_function(int x):"""纯 C 函数，不能被 Python 直接调用"""return x * x

混合函数（cpdef）

cpdef int hybrid_function(int x):"""既可以被 Python 调用，也可以被 Cython 高效调用"""return x * x * x

内联函数

cdef inline int fast_function(int x):"""内联函数，编译时会被展开"""return x + 1

与 C 代码交互

调用 C 标准库

from libc.math cimport sin, cos, sqrt
from libc.stdio cimport printfdef math_operations(double x):cdef double result = sin(x) + cos(x)printf("Result: %f\n", result)return sqrt(result)

调用外部 C 库

# 在 .pxd 文件中声明外部函数
cdef extern from "mylib.h":int external_function(int x, int y)# 在 .pyx 文件中使用
def use_external_lib(int a, int b):return external_function(a, b)

包装 C 结构体

cdef extern from "person.h":struct Person:char* nameint agecdef class PyPerson:cdef Person* _persondef __cinit__(self, name, age):self._person = <Person*>malloc(sizeof(Person))# 初始化结构体...def __dealloc__(self):free(self._person)

编译 Cython 代码

方法一：使用 setup.py

# setup.py
from setuptools import setup
from Cython.Build import cythonizesetup(ext_modules = cythonize("hello.pyx")
)

编译命令：

python setup.py build_ext --inplace

方法二：使用 pyximport（开发阶段）

import pyximport
pyximport.install()# 现在可以直接 import .pyx 文件
import hello
print(hello.say_hello("World"))

方法三：Jupyter Notebook 中使用

%load_ext Cython

%%cython
def fibonacci(int n):cdef int a, b, ia, b = 0, 1for i in range(n):a, b = b, a + breturn a

C++ 调用 Cython 代码

在某些情况下，您可能需要从 C++ 应用程序中调用 Cython 编写的函数。这涉及将 Cython 代码编译为可被 C++ 链接的库，并正确处理 Python 运行时环境。

方法一：将 Cython 编译为 C++ 扩展

1. 编写 Cython 代码

# math_operations.pyx
cdef public int add_numbers(int a, int b):"""C++ 可调用的加法函数"""return a + bcdef public double calculate_average(double* arr, int size):"""C++ 可调用的平均值计算函数"""cdef double sum = 0.0cdef int ifor i in range(size):sum += arr[i]return sum / size if size > 0 else 0.0cdef public class Calculator:"""C++ 可调用的计算器类"""cdef public double valuedef __init__(self, double initial_value=0.0):self.value = initial_valuecdef public void add(self, double x):self.value += xcdef public void multiply(self, double x):self.value *= xcdef public double get_value(self):return self.value

2. 创建头文件声明

# math_operations.pxd
cdef public int add_numbers(int a, int b)
cdef public double calculate_average(double* arr, int size)cdef public class Calculator:cdef public double valuecdef public void add(self, double x)cdef public void multiply(self, double x)cdef public double get_value(self)

3. 编写 setup.py

# setup.py
from setuptools import setup
from Cython.Build import cythonize
from Cython.Distutils import build_ext
import numpysetup(ext_modules=cythonize("math_operations.pyx",language_level=3,compiler_directives={'language_level': 3}),include_dirs=[numpy.get_include()],cmdclass={'build_ext': build_ext}
)

4. 编译生成 C++ 可用的文件

python setup.py build_ext --inplace

这将生成：

math_operations.h：头文件
math_operations.c：C 源文件
math_operations.so（Linux）或 math_operations.pyd（Windows）：动态库

方法二：使用 Python C API

1. 编写 Cython 包装器

# cython_wrapper.pyx
import sys
from cpython.ref cimport PyObject# 初始化 Python 解释器
cdef public int init_python():"""初始化 Python 环境"""if not Py_IsInitialized():Py_Initialize()return 1return 0cdef public void cleanup_python():"""清理 Python 环境"""if Py_IsInitialized():Py_Finalize()# 数学运算函数
cdef public double py_calculate_sum(double* values, int count):"""计算数组和"""cdef double result = 0.0cdef int ifor i in range(count):result += values[i]return resultcdef public double py_calculate_product(double* values, int count):"""计算数组乘积"""cdef double result = 1.0cdef int ifor i in range(count):result *= values[i]return result# 字符串处理函数
cdef public char* py_process_string(const char* input_str):"""处理字符串并返回结果"""try:# 转换为 Python 字符串py_str = input_str.decode('utf-8')# 进行一些处理processed = py_str.upper().replace(' ', '_')# 转换回 C 字符串result_bytes = processed.encode('utf-8')# 注意：这里需要小心内存管理return result_bytesexcept:return b"ERROR"

2. C++ 调用代码

// main.cpp
#include <iostream>
#include <vector>
#include "math_operations.h"  // Cython 生成的头文件extern "C" {// 声明 Cython 函数int init_python();void cleanup_python();double py_calculate_sum(double* values, int count);double py_calculate_product(double* values, int count);char* py_process_string(const char* input_str);
}class CythonInterface {
private:bool initialized;public:CythonInterface() : initialized(false) {if (init_python()) {initialized = true;std::cout << "Python 环境初始化成功" << std::endl;} else {std::cerr << "Python 环境初始化失败" << std::endl;}}~CythonInterface() {if (initialized) {cleanup_python();std::cout << "Python 环境已清理" << std::endl;}}double calculateSum(const std::vector<double>& values) {if (!initialized || values.empty()) return 0.0;return py_calculate_sum(const_cast<double*>(values.data()), static_cast<int>(values.size()));}double calculateProduct(const std::vector<double>& values) {if (!initialized || values.empty()) return 1.0;return py_calculate_product(const_cast<double*>(values.data()), static_cast<int>(values.size()));}std::string processString(const std::string& input) {if (!initialized) return "ERROR: Python not initialized";char* result = py_process_string(input.c_str());return std::string(result);}
};int main() {// 创建接口对象CythonInterface cython_interface;// 测试数学运算std::vector<double> numbers = {1.5, 2.5, 3.5, 4.5, 5.5};double sum = cython_interface.calculateSum(numbers);double product = cython_interface.calculateProduct(numbers);std::cout << "数组和: " << sum << std::endl;std::cout << "数组乘积: " << product << std::endl;// 测试字符串处理std::string input = "hello world";std::string processed = cython_interface.processString(input);std::cout << "处理后的字符串: " << processed << std::endl;return 0;
}

方法三：使用 pybind11 与 Cython 结合

1. 安装 pybind11

pip install pybind11

2. 编写 Cython 模块

# fast_math.pyx
import numpy as np
cimport numpy as cnpdef matrix_multiply_cython(cnp.ndarray[double, ndim=2] A, cnp.ndarray[double, ndim=2] B):"""高效的矩阵乘法"""cdef int i, j, kcdef int m = A.shape[0]cdef int n = A.shape[1]cdef int p = B.shape[1]cdef cnp.ndarray[double, ndim=2] C = np.zeros((m, p), dtype=np.double)for i in range(m):for j in range(p):for k in range(n):C[i, j] += A[i, k] * B[k, j]return Cdef fibonacci_cython(int n):"""计算斐波那契数列"""cdef int a = 0, b = 1, iif n <= 0:return 0elif n == 1:return 1for i in range(2, n + 1):a, b = b, a + breturn b

3. 编写 pybind11 包装器

// pybind_wrapper.cpp
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>namespace py = pybind11;// 调用 Cython 模块的函数
py::array_t<double> call_matrix_multiply(py::array_t<double> A, py::array_t<double> B) {// 导入 Cython 模块py::module fast_math = py::module::import("fast_math");// 调用 Cython 函数return fast_math.attr("matrix_multiply_cython")(A, B).cast<py::array_t<double>>();
}int call_fibonacci(int n) {py::module fast_math = py::module::import("fast_math");return fast_math.attr("fibonacci_cython")(n).cast<int>();
}PYBIND11_MODULE(cython_bridge, m) {m.doc() = "C++ 到 Cython 的桥接模块";m.def("matrix_multiply", &call_matrix_multiply, "高效矩阵乘法（通过 Cython）",py::arg("A"), py::arg("B"));m.def("fibonacci", &call_fibonacci, "计算斐波那契数（通过 Cython）",py::arg("n"));
}

编译和链接

CMake 配置示例

# CMakeLists.txt
cmake_minimum_required(VERSION 3.12)
project(CythonCppIntegration)set(CMAKE_CXX_STANDARD 14)# 查找 Python
find_package(Python COMPONENTS Interpreter Development REQUIRED)# 包含 Python 头文件
include_directories(${Python_INCLUDE_DIRS})# 添加可执行文件
add_executable(main main.cpp)# 链接 Python 库
target_link_libraries(main ${Python_LIBRARIES})# 如果有 Cython 生成的库
find_library(CYTHON_LIB math_operations PATHS ${CMAKE_CURRENT_SOURCE_DIR})
if(CYTHON_LIB)target_link_libraries(main ${CYTHON_LIB})
endif()

注意事项和最佳实践

1. 内存管理

// 正确的内存管理示例
class SafeCythonCaller {
private:bool python_initialized;public:SafeCythonCaller() {python_initialized = (init_python() == 1);}~SafeCythonCaller() {if (python_initialized) {cleanup_python();}}// 禁止复制构造和赋值SafeCythonCaller(const SafeCythonCaller&) = delete;SafeCythonCaller& operator=(const SafeCythonCaller&) = delete;
};

2. 错误处理

// 带错误处理的调用
double safe_calculate_sum(const std::vector<double>& values) {try {if (values.empty()) {throw std::invalid_argument("空数组");}double result = py_calculate_sum(const_cast<double*>(values.data()), static_cast<int>(values.size()));// 检查 Python 异常if (PyErr_Occurred()) {PyErr_Print();throw std::runtime_error("Cython 函数执行出错");}return result;} catch (const std::exception& e) {std::cerr << "错误: " << e.what() << std::endl;return 0.0;}
}

3. 性能考虑

// 批量调用优化
class BatchProcessor {
public:std::vector<double> process_batch(const std::vector<std::vector<double>>& batches) {std::vector<double> results;results.reserve(batches.size());// 一次性初始化 Python 环境SafeCythonCaller caller;for (const auto& batch : batches) {// 批量处理，减少 Python 调用开销double result = py_calculate_sum(const_cast<double*>(batch.data()), static_cast<int>(batch.size()));results.push_back(result);}return results;}
};

完整示例项目结构

project/
├── cython_code/
│   ├── math_operations.pyx
│   ├── math_operations.pxd
│   └── setup.py
├── cpp_code/
│   ├── main.cpp
│   ├── cython_interface.h
│   └── CMakeLists.txt
├── build_scripts/
│   ├── build_cython.sh
│   └── build_cpp.sh
└── README.md

这种集成方式特别适用于：

需要在 C++ 应用中使用 Python 生态系统的算法
高性能计算项目中的特定模块优化
游戏引擎中集成 AI 算法
实时系统中的数据处理模块

性能优化技巧

1. 类型声明

# 慢速版本
def slow_loop(n):total = 0for i in range(n):total += ireturn total# 快速版本
def fast_loop(int n):cdef int total = 0cdef int ifor i in range(n):total += ireturn total

2. 避免 Python 对象操作

# 慢速：频繁的 Python 对象创建
def slow_string_ops(strings):result = []for s in strings:result.append(s.upper())return result# 快速：减少 Python 对象操作
def fast_string_ops(list strings):cdef list result = []cdef str sfor s in strings:result.append(s.upper())return result

3. 使用内存视图（Memory Views）

import numpy as np
cimport numpy as cnpdef process_2d_array(double[:, :] arr):"""使用内存视图处理 2D 数组"""cdef int i, jcdef int rows = arr.shape[0]cdef int cols = arr.shape[1]for i in range(rows):for j in range(cols):arr[i, j] *= 2.0

4. 并行化

from cython.parallel import prangedef parallel_sum(double[:] arr):cdef double total = 0.0cdef int icdef int n = arr.shape[0]for i in prange(n, nogil=True):total += arr[i]return total

实际应用案例

案例1：快速排序算法

# quicksort.pyx
def quicksort(list arr):if len(arr) <= 1:return arrcdef int pivot = arr[len(arr) // 2]cdef list left = []cdef list middle = []cdef list right = []for x in arr:if x < pivot:left.append(x)elif x == pivot:middle.append(x)else:right.append(x)return quicksort(left) + middle + quicksort(right)# 更优化的版本
cdef void quicksort_inplace(int* arr, int low, int high):if low < high:cdef int pi = partition(arr, low, high)quicksort_inplace(arr, low, pi - 1)quicksort_inplace(arr, pi + 1, high)cdef int partition(int* arr, int low, int high):cdef int pivot = arr[high]cdef int i = low - 1cdef int j, tempfor j in range(low, high):if arr[j] <= pivot:i += 1temp = arr[i]arr[i] = arr[j]arr[j] = temptemp = arr[i + 1]arr[i + 1] = arr[high]arr[high] = tempreturn i + 1

案例2：矩阵乘法

# matrix_mult.pyx
import numpy as np
cimport numpy as cnpdef matrix_multiply(double[:, :] A, double[:, :] B):"""高效的矩阵乘法实现"""cdef int i, j, kcdef int m = A.shape[0]cdef int n = A.shape[1]cdef int p = B.shape[1]# 创建结果矩阵cdef cnp.ndarray[cnp.double_t, ndim=2] C = np.zeros((m, p), dtype=np.double)cdef double[:, :] C_view = C# 执行矩阵乘法for i in range(m):for j in range(p):for k in range(n):C_view[i, j] += A[i, k] * B[k, j]return C

案例3：图像处理

# image_processing.pyx
import numpy as np
cimport numpy as cnpdef gaussian_blur(cnp.uint8_t[:, :, :] image, double sigma):"""简单的高斯模糊实现"""cdef int height = image.shape[0]cdef int width = image.shape[1]cdef int channels = image.shape[2]# 创建输出图像cdef cnp.ndarray[cnp.uint8_t, ndim=3] result = np.zeros_like(image)cdef cnp.uint8_t[:, :, :] result_view = result# 简化的模糊核cdef double kernel[3][3]kernel[0][:] = [1.0/16, 2.0/16, 1.0/16]kernel[1][:] = [2.0/16, 4.0/16, 2.0/16]kernel[2][:] = [1.0/16, 2.0/16, 1.0/16]cdef int i, j, c, ki, kjcdef double sum_valfor i in range(1, height - 1):for j in range(1, width - 1):for c in range(channels):sum_val = 0.0for ki in range(3):for kj in range(3):sum_val += kernel[ki][kj] * image[i + ki - 1, j + kj - 1, c]result_view[i, j, c] = <cnp.uint8_t>sum_valreturn result

常见问题和调试

1. 编译错误

# 查看详细的编译信息
CYTHON_TRACE=1 python setup.py build_ext --inplace

2. 性能分析

# 使用 cProfile 分析性能
import cProfile
cProfile.run('your_function()')# 在 Jupyter 中使用
%timeit your_function()
%prun your_function()

3. 调试技巧

# 使用 print 调试（在开发阶段）
def debug_function(int x):print(f"Input: {x}")cdef int result = x * 2print(f"Result: {result}")return result# 使用断言
def safe_divide(double a, double b):assert b != 0, "Division by zero!"return a / b

4. 内存泄漏检查

# 正确的内存管理
cdef class SafeArray:cdef double* datacdef int sizedef __cinit__(self, int size):self.size = sizeself.data = <double*>malloc(size * sizeof(double))if not self.data:raise MemoryError("Could not allocate memory")def __dealloc__(self):if self.data:free(self.data)

最佳实践

1. 代码组织

将性能关键部分用 Cython 重写
保持 Python 接口的简洁性
使用类型注解提高可读性

2. 性能优化策略

先用 Python 实现原型
识别性能瓶颈
逐步用 Cython 优化关键部分
使用性能分析工具验证改进

3. 测试

# test_cython_module.py
import unittest
import numpy as np
from my_cython_module import fast_functionclass TestCythonModule(unittest.TestCase):def test_fast_function(self):result = fast_function(5)self.assertEqual(result, 25)def test_with_numpy(self):arr = np.array([1, 2, 3, 4, 5])result = process_array(arr)expected = np.array([2, 4, 6, 8, 10])np.testing.assert_array_equal(result, expected)if __name__ == '__main__':unittest.main()