如何在 Windows 中复制加速（苹果 DSP 库）功能？

发布于 2025-01-06 05:01:35 字数 1059 浏览 1 评论 0原文

我将尽可能简洁地说：

由于一些非常具体的硬件限制，我有一个项目需要移植到 Windows。有一个小实用程序类，它使用 Apple DSP 库 Accelerate 执行矢量距离计算。我需要重写它，以便它在没有所述库的情况下运行，但一直无法找到合适的替代品。我最好的行动方案是什么？

#include <Accelerate/Accelerate.h>

inline float distBetween(float *x, float *y, unsigned int count) {
    float *tmp = (float*)malloc(count * sizeof(float));
    //  float tmp[count];
    //t = y - x
    vDSP_vsub(x, 1, y, 1, tmp, 1, count);
    //t.squared
    vDSP_vsq(tmp, 1, tmp, 1, count);
    //t.sum
    float sum;
    vDSP_sve(tmp, 1, &sum, count);
    delete tmp;
    return sqrt(sum);
}

inline float cosineDistance(float *x, float *y, unsigned int count) {
    float dotProd, magX, magY;
    float *tmp = (float*)malloc(count * sizeof(float));

    vDSP_dotpr(x, 1, y, 1, &dotProd, count);

    vDSP_vsq(x, 1, tmp, 1, count);
    vDSP_sve(tmp, 1, &magX, count);
    magX = sqrt(magX);

    vDSP_vsq(y, 1, tmp, 1, count);
    vDSP_sve(tmp, 1, &magY, count);
    magY = sqrt(magY);

    delete tmp;

    return 1.0 - (dotProd / (magX * magY));
}

原文

I'm going to make this as succinct as I can:

I have a project that I am needing to port to windows due to some very specific hardware constraints. There's a little utility class which performs vector distance calculations using Accelerate, the Apple DSP library. I need to rewrite this so that it functions without said library, but have been unable to find a suitable replacement. What is my best course of action?

#include <Accelerate/Accelerate.h>

inline float distBetween(float *x, float *y, unsigned int count) {
    float *tmp = (float*)malloc(count * sizeof(float));
    //  float tmp[count];
    //t = y - x
    vDSP_vsub(x, 1, y, 1, tmp, 1, count);
    //t.squared
    vDSP_vsq(tmp, 1, tmp, 1, count);
    //t.sum
    float sum;
    vDSP_sve(tmp, 1, &sum, count);
    delete tmp;
    return sqrt(sum);
}

inline float cosineDistance(float *x, float *y, unsigned int count) {
    float dotProd, magX, magY;
    float *tmp = (float*)malloc(count * sizeof(float));

    vDSP_dotpr(x, 1, y, 1, &dotProd, count);

    vDSP_vsq(x, 1, tmp, 1, count);
    vDSP_sve(tmp, 1, &magX, count);
    magX = sqrt(magX);

    vDSP_vsq(y, 1, tmp, 1, count);
    vDSP_sve(tmp, 1, &magY, count);
    magY = sqrt(magY);

    delete tmp;

    return 1.0 - (dotProd / (magX * magY));
}

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

大姐，你呐 2025-01-13 05:01:35

向量函数通常通过特定的汇编语言指令来实现。这个实现非常慢。也许您需要一个使用 SSE 指令的库。

在您的代码中，所有参数 stride_x、stride_y、stride_res 都等于 1，因此我建议您从函数参数中删除它们。代码应该更快。

//t = y - x    
float
vDSP_vsub(float *x, int stride_x, float *y, int stride_y, float *res, int stride_res, int count)
{
    while(count > 0) 
    {
        // may be *x - *y ?
        *res = *y - *x;
        res += stride_res;
        x += stride_x;
        y += stride_y;
        count--;
    }    
}

//t.squared
float
vDSP_vsq(float *x, int stride_x, float *res, int stride_res, int count)
{
    while(count > 0) 
    {
        *res += (*x) * (*x);
        x += stride_x;
        res += stride_res;
        count--;
    }    
}

//t.sum
float
vDSP_sve(float *x, int stride_x, float *res, int count)
{
    *res = 0.0;
    while(count > 0) 
    {
        *res += *x;
        x += stride_x;
        count--;
    }    
}

float
vDSP_dotpr(float *x, int stride_x, float *y, int stride_y, float *res, int count)
{
    *res = 0.0;
    while(count > 0) 
    {
        *res += (*x) * (*y);
        x += stride_x;
        y += stride_y;
        count--;
    }    
}

Vector functions are usually implemented through a specific assembly language instructions. This implementation is very slow. Perhaps you need a library that uses the SSE instructions.

In your code, all the arguments stride_x, stride_y, stride_res equal to 1, so I recommend you remove them from the functions arguments. Сode should be faster.

//t = y - x    
float
vDSP_vsub(float *x, int stride_x, float *y, int stride_y, float *res, int stride_res, int count)
{
    while(count > 0) 
    {
        // may be *x - *y ?
        *res = *y - *x;
        res += stride_res;
        x += stride_x;
        y += stride_y;
        count--;
    }    
}

//t.squared
float
vDSP_vsq(float *x, int stride_x, float *res, int stride_res, int count)
{
    while(count > 0) 
    {
        *res += (*x) * (*x);
        x += stride_x;
        res += stride_res;
        count--;
    }    
}

//t.sum
float
vDSP_sve(float *x, int stride_x, float *res, int count)
{
    *res = 0.0;
    while(count > 0) 
    {
        *res += *x;
        x += stride_x;
        count--;
    }    
}

float
vDSP_dotpr(float *x, int stride_x, float *y, int stride_y, float *res, int count)
{
    *res = 0.0;
    while(count > 0) 
    {
        *res += (*x) * (*y);
        x += stride_x;
        y += stride_y;
        count--;
    }    
}

回复收藏 0 原文