混淆矩阵可视化:仅支持使用字符串指定列的列。

发布于 2025-02-12 18:58:31 字数 5728 浏览 0 评论 0原文

基于这篇文章: https://towardsdatascience.com/how-to-plot-a-confusion-matrix-from-ak-folm-ak-fold-cross-validation-b607317e9874 ,我想可视化数据集的混淆矩阵。但是我会收到以下错误消息:

AttributeError                            Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/sklearn/utils/__init__.py in _get_column_indices(X, key)
    408         try:
--> 409             all_columns = X.columns
    410         except AttributeError:

AttributeError: 'numpy.ndarray' object has no attribute 'columns'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
8 frames
<ipython-input-76-827f7c5dca49> in <module>()
      1 # call the functions
----> 2 actual_classes, predicted_classes, _ = cross_val_predict(best_grid, kf, X.to_numpy(), y.to_numpy())
      3 plot_confusion_matrix(actual_classes, predicted_classes, ['Rating_1', 'Rating_2', 'Rating_3', 'Rating_4', 'Rating_5'])

<ipython-input-68-f1bff3bd9ebf> in cross_val_predict(best_grid, kf, X, y)
     19         actual_classes = np.append(actual_classes, test_y)
     20 
---> 21         model_.fit(train_X, train_y)
     22         predicted_classes = np.append(predicted_classes, model_.predict(test_X))
     23 

/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
    388         """
    389         fit_params_steps = self._check_fit_params(**fit_params)
--> 390         Xt = self._fit(X, y, **fit_params_steps)
    391         with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
    392             if self._final_estimator != "passthrough":

/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps)
    353                 message_clsname="Pipeline",
    354                 message=self._log_message(step_idx),
--> 355                 **fit_params_steps[name],
    356             )
    357             # Replace the transformer of the step with the fitted

/usr/local/lib/python3.7/dist-packages/joblib/memory.py in __call__(self, *args, **kwargs)
    347 
    348     def __call__(self, *args, **kwargs):
--> 349         return self.func(*args, **kwargs)
    350 
    351     def call_and_shelve(self, *args, **kwargs):

/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
    891     with _print_elapsed_time(message_clsname, message):
    892         if hasattr(transformer, "fit_transform"):
--> 893             res = transformer.fit_transform(X, y, **fit_params)
    894         else:
    895             res = transformer.fit(X, y, **fit_params).transform(X)

/usr/local/lib/python3.7/dist-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
    670         self._check_n_features(X, reset=True)
    671         self._validate_transformers()
--> 672         self._validate_column_callables(X)
    673         self._validate_remainder(X)
    674 

/usr/local/lib/python3.7/dist-packages/sklearn/compose/_column_transformer.py in _validate_column_callables(self, X)
    350                 columns = columns(X)
    351             all_columns.append(columns)
--> 352             transformer_to_input_indices[name] = _get_column_indices(X, columns)
    353 
    354         self._columns = all_columns

/usr/local/lib/python3.7/dist-packages/sklearn/utils/__init__.py in _get_column_indices(X, key)
    410         except AttributeError:
    411             raise ValueError(
--> 412                 "Specifying the columns using strings is only "
    413                 "supported for pandas DataFrames"
    414             )

ValueError: Specifying the columns using strings is only supported for pandas DataFrames

这是我的代码:

def cross_val_predict(best_grid, kf:KFold, X:np.array, y:np.array) -> Tuple[np.array, np.array, np.array]:

model_ = cp.deepcopy(best_grid)

no_classes = len(np.unique(y))

actual_classes = np.empty([0], dtype=int)
predicted_classes = np.empty([0], dtype=int)
predicted_proba = np.empty([0, no_classes]) 

for train_idx, test_idx in kf.split(X):

    train_X, train_y, test_X, test_y = X[train_idx], y[train_idx], X[test_idx], y[test_idx]

    actual_classes = np.append(actual_classes, test_y)

    model_.fit(train_X, train_y)
    predicted_classes = np.append(predicted_classes, model_.predict(test_X))

    try:
        predicted_proba = np.append(predicted_proba, model_.predict_proba(test_X), axis=0)
    except:
        predicted_proba = np.append(predicted_proba, np.zeros((len(test_X), no_classes), dtype=float), axis=0)

return actual_classes, predicted_classes, predicted_proba

# Visualise the Confusion Matrix
def plot_confusion_matrix(actual_classes : np.array, predicted_classes : np.array, sorted_labels : list):

    matrix = confusion_matrix(actual_classes, predicted_classes, labels=sorted_labels)
    
    plt.figure(figsize=(12.8,6))
    sns.heatmap(matrix, annot=True, xticklabels=sorted_labels, yticklabels=sorted_labels, cmap='Blues', fmt='g')
    plt.xlabel('Predicted'); plt.ylabel('Actual'); plt.title('Confusion Matrix Visualization')

    plt.show()

# call the functions
actual_classes, predicted_classes, _ = cross_val_predict(best_grid, kf, X.to_numpy(), y.to_numpy())
plot_confusion_matrix(actual_classes, predicted_classes, ['Rating_1', 'Rating_2', 'Rating_3', 'Rating_4', 'Rating_5'])

错误消息在我运行代码以调用函数之后出现。提前致谢。

Based on this post: https://towardsdatascience.com/how-to-plot-a-confusion-matrix-from-a-k-fold-cross-validation-b607317e9874, I want to visualize the confusion matrix of my dataset. But I get the following error message:

AttributeError                            Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/sklearn/utils/__init__.py in _get_column_indices(X, key)
    408         try:
--> 409             all_columns = X.columns
    410         except AttributeError:

AttributeError: 'numpy.ndarray' object has no attribute 'columns'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
8 frames
<ipython-input-76-827f7c5dca49> in <module>()
      1 # call the functions
----> 2 actual_classes, predicted_classes, _ = cross_val_predict(best_grid, kf, X.to_numpy(), y.to_numpy())
      3 plot_confusion_matrix(actual_classes, predicted_classes, ['Rating_1', 'Rating_2', 'Rating_3', 'Rating_4', 'Rating_5'])

<ipython-input-68-f1bff3bd9ebf> in cross_val_predict(best_grid, kf, X, y)
     19         actual_classes = np.append(actual_classes, test_y)
     20 
---> 21         model_.fit(train_X, train_y)
     22         predicted_classes = np.append(predicted_classes, model_.predict(test_X))
     23 

/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
    388         """
    389         fit_params_steps = self._check_fit_params(**fit_params)
--> 390         Xt = self._fit(X, y, **fit_params_steps)
    391         with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
    392             if self._final_estimator != "passthrough":

/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps)
    353                 message_clsname="Pipeline",
    354                 message=self._log_message(step_idx),
--> 355                 **fit_params_steps[name],
    356             )
    357             # Replace the transformer of the step with the fitted

/usr/local/lib/python3.7/dist-packages/joblib/memory.py in __call__(self, *args, **kwargs)
    347 
    348     def __call__(self, *args, **kwargs):
--> 349         return self.func(*args, **kwargs)
    350 
    351     def call_and_shelve(self, *args, **kwargs):

/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
    891     with _print_elapsed_time(message_clsname, message):
    892         if hasattr(transformer, "fit_transform"):
--> 893             res = transformer.fit_transform(X, y, **fit_params)
    894         else:
    895             res = transformer.fit(X, y, **fit_params).transform(X)

/usr/local/lib/python3.7/dist-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
    670         self._check_n_features(X, reset=True)
    671         self._validate_transformers()
--> 672         self._validate_column_callables(X)
    673         self._validate_remainder(X)
    674 

/usr/local/lib/python3.7/dist-packages/sklearn/compose/_column_transformer.py in _validate_column_callables(self, X)
    350                 columns = columns(X)
    351             all_columns.append(columns)
--> 352             transformer_to_input_indices[name] = _get_column_indices(X, columns)
    353 
    354         self._columns = all_columns

/usr/local/lib/python3.7/dist-packages/sklearn/utils/__init__.py in _get_column_indices(X, key)
    410         except AttributeError:
    411             raise ValueError(
--> 412                 "Specifying the columns using strings is only "
    413                 "supported for pandas DataFrames"
    414             )

ValueError: Specifying the columns using strings is only supported for pandas DataFrames

Here is my code:

def cross_val_predict(best_grid, kf:KFold, X:np.array, y:np.array) -> Tuple[np.array, np.array, np.array]:

model_ = cp.deepcopy(best_grid)

no_classes = len(np.unique(y))

actual_classes = np.empty([0], dtype=int)
predicted_classes = np.empty([0], dtype=int)
predicted_proba = np.empty([0, no_classes]) 

for train_idx, test_idx in kf.split(X):

    train_X, train_y, test_X, test_y = X[train_idx], y[train_idx], X[test_idx], y[test_idx]

    actual_classes = np.append(actual_classes, test_y)

    model_.fit(train_X, train_y)
    predicted_classes = np.append(predicted_classes, model_.predict(test_X))

    try:
        predicted_proba = np.append(predicted_proba, model_.predict_proba(test_X), axis=0)
    except:
        predicted_proba = np.append(predicted_proba, np.zeros((len(test_X), no_classes), dtype=float), axis=0)

return actual_classes, predicted_classes, predicted_proba

# Visualise the Confusion Matrix
def plot_confusion_matrix(actual_classes : np.array, predicted_classes : np.array, sorted_labels : list):

    matrix = confusion_matrix(actual_classes, predicted_classes, labels=sorted_labels)
    
    plt.figure(figsize=(12.8,6))
    sns.heatmap(matrix, annot=True, xticklabels=sorted_labels, yticklabels=sorted_labels, cmap='Blues', fmt='g')
    plt.xlabel('Predicted'); plt.ylabel('Actual'); plt.title('Confusion Matrix Visualization')

    plt.show()

# call the functions
actual_classes, predicted_classes, _ = cross_val_predict(best_grid, kf, X.to_numpy(), y.to_numpy())
plot_confusion_matrix(actual_classes, predicted_classes, ['Rating_1', 'Rating_2', 'Rating_3', 'Rating_4', 'Rating_5'])

The error message came after I ran the code to call the functions. Thanks in advance.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文