c＆＃x2b;＆＃x2b;：实现GSL算法中的两个自变量的非线性最小二乘拟合

发布于 2025-02-13 23:15:02 字数 7412 浏览 2 评论 0原文

在修复程序非线性最小二乘GSL中拟合功能的参数（@zkoza成功回答），我想实现一个可以通过固定其某些参数，同时将其他参数更改以找到最佳数据拟合的算法，该算法可以将数据拟合到非线性函数。与我以前的问题的不同之处在于，我想拥有两个自变量，而不是一个自变量。

非线性函数用于符合数据的非线性函数

double gaussian(double x, double b, double a, double c)
{
    const double z = (x - b) / c;
    return a * std::exp(-0.5 * z * z);
}

在我以前的问题中，我正在考虑x是唯一的自变量。现在，我想考虑两个独立变量，x和b。

用于拟合非线性函数的原始算法仅使用一个自变量（固定变量a）是GSL非线性最小二乘算法的C ++包装器（借用来自 https://github.com/eeleobert/eeleobert/gsl-curve-fit/ blob/master/example.cpp ）：

template <typename F, size_t... Is>
auto gen_tuple_impl(F func, std::index_sequence<Is...> )
{
    return std::make_tuple(func(Is)...);
}

template <size_t N, typename F>
auto gen_tuple(F func)
{
    return gen_tuple_impl(func, std::make_index_sequence<N>{} );
}

template <class R, class... ARGS>
struct function_ripper {
    static constexpr size_t n_args = sizeof...(ARGS);
};

template <class R, class... ARGS>
auto constexpr n_params(R (ARGS...) )
{
    return function_ripper<R, ARGS...>();
}


auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
             gsl_multifit_nlinear_parameters *params) -> std::vector<double>
{
  // This specifies a trust region method
  const gsl_multifit_nlinear_type *T = gsl_multifit_nlinear_trust;
  const size_t max_iter = 200;
  const double xtol = 1.0e-8;
  const double gtol = 1.0e-8;
  const double ftol = 1.0e-8;

  auto *work = gsl_multifit_nlinear_alloc(T, params, fdf->n, fdf->p);
  int info;

  // initialize solver
  gsl_multifit_nlinear_init(initial_params, fdf, work);
  //iterate until convergence
  gsl_multifit_nlinear_driver(max_iter, xtol, gtol, ftol, nullptr, nullptr, &info, work);

  // result will be stored here
  gsl_vector * y    = gsl_multifit_nlinear_position(work);
  auto result = std::vector<double>(initial_params->size);

  for(int i = 0; i < result.size(); i++)
  {
    result[i] = gsl_vector_get(y, i);
  }

  auto niter = gsl_multifit_nlinear_niter(work);
  auto nfev  = fdf->nevalf;
  auto njev  = fdf->nevaldf;
  auto naev  = fdf->nevalfvv;

  // nfev - number of function evaluations
  // njev - number of Jacobian evaluations
  // naev - number of f_vv evaluations
  //logger::debug("curve fitted after ", niter, " iterations {nfev = ", nfev, "} {njev = ", njev, "} {naev = ", naev, "}");

  gsl_multifit_nlinear_free(work);
  gsl_vector_free(initial_params);
  return result;
}

template<auto n>
auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*
{
    auto* result = gsl_vector_alloc(vec.size());
    int i = 0;
    for(const auto e: vec)
    {
        gsl_vector_set(result, i, e);
        i++;
    }
    return result;
}


template<typename C1>
struct fit_data
{
    const std::vector<double>& t;
    const std::vector<double>& y;
    // the actual function to be fitted
    C1 f;
};


template<typename FitData, int n_params>
int internal_f(const gsl_vector* x, void* params, gsl_vector *f)
{
    auto* d  = static_cast<FitData*>(params);
    // Convert the parameter values from gsl_vector (in x) into std::tuple
    auto init_args = [x](int index)
    {
        return gsl_vector_get(x, index);
    };
    auto parameters = gen_tuple<n_params>(init_args);

    // Calculate the error for each...
    for (size_t i = 0; i < d->t.size(); ++i)
    {
        double ti = d->t[i];
        double yi = d->y[i];
        auto func = [ti, &d](auto ...xs)
        {
            // call the actual function to be fitted
            return d->f(ti, xs...);
        };
        auto y = std::apply(func, parameters);
        gsl_vector_set(f, i, yi - y);
    }
    return GSL_SUCCESS;
}

using func_f_type   = int (*) (const gsl_vector*, void*, gsl_vector*);
using func_df_type  = int (*) (const gsl_vector*, void*, gsl_matrix*);
using func_fvv_type = int (*) (const gsl_vector*, const gsl_vector *, void *, gsl_vector *);

template<auto n>
auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*;


auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
             gsl_multifit_nlinear_parameters *params) -> std::vector<double>;

template<typename C1>
auto curve_fit_impl(func_f_type f, func_df_type df, func_fvv_type fvv, gsl_vector* initial_params, fit_data<C1>& fd) -> std::vector<double>
{
    assert(fd.t.size() == fd.y.size());

    auto fdf = gsl_multifit_nlinear_fdf();
    auto fdf_params = gsl_multifit_nlinear_default_parameters();

    fdf.f   = f;
    fdf.df  = df;
    fdf.fvv = fvv;
    fdf.n   = fd.t.size();
    fdf.p   = initial_params->size;
    fdf.params = &fd;

    // "This selects the Levenberg-Marquardt algorithm with geodesic acceleration."
    fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel;
    return internal_solve_system(initial_params, &fdf, &fdf_params);
}


template <typename Callable, auto n>
auto curve_fit(Callable f, const std::array<double, n>& initial_params, const std::vector<double>& x, const std::vector<double>& y) -> std::vector<double>
{
    // We can't pass lambdas without convert to std::function.
    //constexpr auto n = 3;//decltype(n_params(f))::n_args - 5;
    //constexpr auto n = 2;
    assert(initial_params.size() == n);

    auto params = internal_make_gsl_vector_ptr(initial_params);
    auto fd = fit_data<Callable>{x, y, f};
    return curve_fit_impl(internal_f<decltype(fd), n>, nullptr, nullptr, params,  fd);
}

为了修复高斯函数的参数之一， @zkoza建议使用fuctrotors：

struct gaussian_fixed_a
{
    double a;
    gaussian_fixed_a(double a) : a{a} {}
    double operator()(double x, double b, double c) const { return gaussian(x, b, a, c); }
};

这些最后一行显示了我如何创建一个观察到的数据的伪造数据集（具有正态分布的噪声），并用两个独立变量测试拟合曲线函数，通过向量XS和bs。

    int main()
    {
        auto device = std::random_device();
        auto gen    = std::mt19937(device());
    
        auto xs = linspace<std::vector<double>>(0.0, 1.0, 300);
        auto bs = linspace<std::vector<double>>(0.4, 1.4, 300);
        auto ys = std::vector<double>(xs.size());
    
        double a = 5.0, c = 0.15;
    
        for(size_t i = 0; i < xs.size(); i++)
        {

            auto y =  gaussian(xs[i], a, bs[i], c);
            auto dist  = std::normal_distribution(0.0, 0.1 * y);
            ys[i] = y + dist(gen);
        }
        gaussian_fixed_a g(a);
        auto r = curve_fit(g, std::array{0.11}, xs, bs, ys);
    
        std::cout << "result: " << r[0] << ' ' << '\n';
        std::cout << "error : " << r[0] - c << '\n';
    
    }

您是否对如何实现双独立变量非线性拟合有任何想法？

原文

Following up to a previous question I asked in Fixing parameters of a fitting function in Nonlinear Least-Square GSL (successfully answered by @zkoza), I would like to implement an algorithm that can fit data to a non-linear function, by fixing some of its parameters while leaving other parameters to change for finding the best fit to the data. The difference to my previous question is that I want to have two independent variables instead of one independent variable.

Non-linear function used to fit the data

double gaussian(double x, double b, double a, double c)
{
    const double z = (x - b) / c;
    return a * std::exp(-0.5 * z * z);
}

In my previous question I was considering that x was the only independent variable. Now I would like to consider two independent variables, x and b.

The original algorithm used to fit a non-linear function using only one independent variable (while fixing variable a) is a C++ wrapper of the GSL nonlinear least-squares algorithm (borrowed from https://github.com/Eleobert/gsl-curve-fit/blob/master/example.cpp):

template <typename F, size_t... Is>
auto gen_tuple_impl(F func, std::index_sequence<Is...> )
{
    return std::make_tuple(func(Is)...);
}

template <size_t N, typename F>
auto gen_tuple(F func)
{
    return gen_tuple_impl(func, std::make_index_sequence<N>{} );
}

template <class R, class... ARGS>
struct function_ripper {
    static constexpr size_t n_args = sizeof...(ARGS);
};

template <class R, class... ARGS>
auto constexpr n_params(R (ARGS...) )
{
    return function_ripper<R, ARGS...>();
}


auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
             gsl_multifit_nlinear_parameters *params) -> std::vector<double>
{
  // This specifies a trust region method
  const gsl_multifit_nlinear_type *T = gsl_multifit_nlinear_trust;
  const size_t max_iter = 200;
  const double xtol = 1.0e-8;
  const double gtol = 1.0e-8;
  const double ftol = 1.0e-8;

  auto *work = gsl_multifit_nlinear_alloc(T, params, fdf->n, fdf->p);
  int info;

  // initialize solver
  gsl_multifit_nlinear_init(initial_params, fdf, work);
  //iterate until convergence
  gsl_multifit_nlinear_driver(max_iter, xtol, gtol, ftol, nullptr, nullptr, &info, work);

  // result will be stored here
  gsl_vector * y    = gsl_multifit_nlinear_position(work);
  auto result = std::vector<double>(initial_params->size);

  for(int i = 0; i < result.size(); i++)
  {
    result[i] = gsl_vector_get(y, i);
  }

  auto niter = gsl_multifit_nlinear_niter(work);
  auto nfev  = fdf->nevalf;
  auto njev  = fdf->nevaldf;
  auto naev  = fdf->nevalfvv;

  // nfev - number of function evaluations
  // njev - number of Jacobian evaluations
  // naev - number of f_vv evaluations
  //logger::debug("curve fitted after ", niter, " iterations {nfev = ", nfev, "} {njev = ", njev, "} {naev = ", naev, "}");

  gsl_multifit_nlinear_free(work);
  gsl_vector_free(initial_params);
  return result;
}

template<auto n>
auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*
{
    auto* result = gsl_vector_alloc(vec.size());
    int i = 0;
    for(const auto e: vec)
    {
        gsl_vector_set(result, i, e);
        i++;
    }
    return result;
}


template<typename C1>
struct fit_data
{
    const std::vector<double>& t;
    const std::vector<double>& y;
    // the actual function to be fitted
    C1 f;
};


template<typename FitData, int n_params>
int internal_f(const gsl_vector* x, void* params, gsl_vector *f)
{
    auto* d  = static_cast<FitData*>(params);
    // Convert the parameter values from gsl_vector (in x) into std::tuple
    auto init_args = [x](int index)
    {
        return gsl_vector_get(x, index);
    };
    auto parameters = gen_tuple<n_params>(init_args);

    // Calculate the error for each...
    for (size_t i = 0; i < d->t.size(); ++i)
    {
        double ti = d->t[i];
        double yi = d->y[i];
        auto func = [ti, &d](auto ...xs)
        {
            // call the actual function to be fitted
            return d->f(ti, xs...);
        };
        auto y = std::apply(func, parameters);
        gsl_vector_set(f, i, yi - y);
    }
    return GSL_SUCCESS;
}

using func_f_type   = int (*) (const gsl_vector*, void*, gsl_vector*);
using func_df_type  = int (*) (const gsl_vector*, void*, gsl_matrix*);
using func_fvv_type = int (*) (const gsl_vector*, const gsl_vector *, void *, gsl_vector *);

template<auto n>
auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*;


auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
             gsl_multifit_nlinear_parameters *params) -> std::vector<double>;

template<typename C1>
auto curve_fit_impl(func_f_type f, func_df_type df, func_fvv_type fvv, gsl_vector* initial_params, fit_data<C1>& fd) -> std::vector<double>
{
    assert(fd.t.size() == fd.y.size());

    auto fdf = gsl_multifit_nlinear_fdf();
    auto fdf_params = gsl_multifit_nlinear_default_parameters();

    fdf.f   = f;
    fdf.df  = df;
    fdf.fvv = fvv;
    fdf.n   = fd.t.size();
    fdf.p   = initial_params->size;
    fdf.params = &fd;

    // "This selects the Levenberg-Marquardt algorithm with geodesic acceleration."
    fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel;
    return internal_solve_system(initial_params, &fdf, &fdf_params);
}


template <typename Callable, auto n>
auto curve_fit(Callable f, const std::array<double, n>& initial_params, const std::vector<double>& x, const std::vector<double>& y) -> std::vector<double>
{
    // We can't pass lambdas without convert to std::function.
    //constexpr auto n = 3;//decltype(n_params(f))::n_args - 5;
    //constexpr auto n = 2;
    assert(initial_params.size() == n);

    auto params = internal_make_gsl_vector_ptr(initial_params);
    auto fd = fit_data<Callable>{x, y, f};
    return curve_fit_impl(internal_f<decltype(fd), n>, nullptr, nullptr, params,  fd);
}

In order to fix one of the parameters of the gaussian function, @zkoza proposed to use functors:

struct gaussian_fixed_a
{
    double a;
    gaussian_fixed_a(double a) : a{a} {}
    double operator()(double x, double b, double c) const { return gaussian(x, b, a, c); }
};

And these last lines show how I would create a fake dataset of observed data (with some noise which is normally distributed) and test the fitting curve function with two independent variables, given by the vectors xs and bs.

    int main()
    {
        auto device = std::random_device();
        auto gen    = std::mt19937(device());
    
        auto xs = linspace<std::vector<double>>(0.0, 1.0, 300);
        auto bs = linspace<std::vector<double>>(0.4, 1.4, 300);
        auto ys = std::vector<double>(xs.size());
    
        double a = 5.0, c = 0.15;
    
        for(size_t i = 0; i < xs.size(); i++)
        {

            auto y =  gaussian(xs[i], a, bs[i], c);
            auto dist  = std::normal_distribution(0.0, 0.1 * y);
            ys[i] = y + dist(gen);
        }
        gaussian_fixed_a g(a);
        auto r = curve_fit(g, std::array{0.11}, xs, bs, ys);
    
        std::cout << "result: " << r[0] << ' ' << '\n';
        std::cout << "error : " << r[0] - c << '\n';
    
    }

Do you have any idea on how I could implement the two-independent variables non-linear fitting?

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

灵芸 2025-02-20 23:15:02

如@benvoigt的评论所建议的解决方案是替换x和b在高斯函数中的自变量，用“一个自变量”作为向量，其第一个元素是x，第二个元素是b。

此外，非线性拟合的骨干也需要稍微编辑。编辑包含：

fit_data funcor funcor：

  struct fit_data
 {
     const std :: vector＆lt; vector＆lt; double＆gt; ＆gt; ＆amp; t;
     const std :: vector＆lt; double＆gt;＆amp; y;
     //要拟合的实际功能
     C1 F;
 };

这样，自变量不再是vector ，而是vector 的向量（aka aka a 矩阵）。

在功能internal_f中替换。

a）double ti = d-＆gt; t [i]带有std :: vector＆lt; double＆gt; ti = d-＆gt; t [i]
b）auto func = [ti，＆amp; d]（auto ... xs）带有auto func = [ti，＆amp; d]（auto ... auto ... xs_matrix）
c）返回d-＆gt; f（ti，xs ...）带有返回d-＆gt; f（ti，xs_matrix ...）

在中替换curve_fit函数：

a）const std :: vector＆lt; double＆gt;＆amp; x带有const std :: vector＆lt; vector＆lt; double＆gt; ＆gt; ＆amp; xs_matrix
b）auto fd = fit_data＆lt; callable＆gt; {x，y，f}带有auto fd = fit_data＆lt; callable＆gt; {xs_matrix，y，y，y，y，f}

高斯函数，gaussian_fixed_a funcor和拟合功能看起来像：

double gaussian(std::vector<double> x_vector, double a, double c)
    {
        const double z = (x_vector[0] - x_vector[1]) / c;
        return a * std::exp(-0.5 * z * z);
    }

struct gaussian_fixed_a
{
    double a;
    gaussian_fixed_a(double a) : a{a} {}
    double operator()(std::vector<double> x_vector, double c) const { return gaussian(x_vector, a, c); }
};

double fittingTest(const std::vector< vector<double> > &xs_matrix, const std::vector<double> ys, const double a){

      gaussian_fixed_a g(a);
      auto r = curve_fit(g, std::array{3.0}, xs_matrix, ys);

        return r[0]);

        }

The solution, as suggested in the comments by @BenVoigt, is to replace the x and b independent variables in the gaussian function with 'one independent variable' given as a vector, whose first element is x and the second element is b.

Also the backbone of the nonlinear fitting needs to be slightly edited. The edits consist:

Replace the fit_data functor with:

struct fit_data
 {
     const std::vector< vector<double> > &t;
     const std::vector<double>& y;
     // the actual function to be fitted
     C1 f;
 };

Such that, the independent variable is no longer a vector but rather a vector of a vector (aka a matrix).

Replace within the function internal_f.

a) double ti = d->t[i] with std::vector<double> ti = d->t[i]
b) auto func = [ti, &d](auto ...xs) with auto func = [ti, &d](auto ...xs_matrix)
c) return d->f(ti, xs...) with return d->f(ti, xs_matrix...)

Replace within curve_fit function:

a) const std::vector<double>& x with const std::vector< vector<double> > &xs_matrix
b) auto fd = fit_data<Callable>{x, y, f} with auto fd = fit_data<Callable>{xs_matrix, y, f}

Whereas the gaussian function, gaussian_fixed_a functor and the fitting function looks like:

double gaussian(std::vector<double> x_vector, double a, double c)
    {
        const double z = (x_vector[0] - x_vector[1]) / c;
        return a * std::exp(-0.5 * z * z);
    }

struct gaussian_fixed_a
{
    double a;
    gaussian_fixed_a(double a) : a{a} {}
    double operator()(std::vector<double> x_vector, double c) const { return gaussian(x_vector, a, c); }
};

double fittingTest(const std::vector< vector<double> > &xs_matrix, const std::vector<double> ys, const double a){

      gaussian_fixed_a g(a);
      auto r = curve_fit(g, std::array{3.0}, xs_matrix, ys);

        return r[0]);

        }

回复收藏 0 原文

~没有更多了~