混淆矩阵可视化:仅支持使用字符串指定列的列。
基于这篇文章: https://towardsdatascience.com/how-to-plot-a-confusion-matrix-from-ak-folm-ak-fold-cross-validation-b607317e9874 ,我想可视化数据集的混淆矩阵。但是我会收到以下错误消息:
AttributeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/sklearn/utils/__init__.py in _get_column_indices(X, key)
408 try:
--> 409 all_columns = X.columns
410 except AttributeError:
AttributeError: 'numpy.ndarray' object has no attribute 'columns'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
8 frames
<ipython-input-76-827f7c5dca49> in <module>()
1 # call the functions
----> 2 actual_classes, predicted_classes, _ = cross_val_predict(best_grid, kf, X.to_numpy(), y.to_numpy())
3 plot_confusion_matrix(actual_classes, predicted_classes, ['Rating_1', 'Rating_2', 'Rating_3', 'Rating_4', 'Rating_5'])
<ipython-input-68-f1bff3bd9ebf> in cross_val_predict(best_grid, kf, X, y)
19 actual_classes = np.append(actual_classes, test_y)
20
---> 21 model_.fit(train_X, train_y)
22 predicted_classes = np.append(predicted_classes, model_.predict(test_X))
23
/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
388 """
389 fit_params_steps = self._check_fit_params(**fit_params)
--> 390 Xt = self._fit(X, y, **fit_params_steps)
391 with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
392 if self._final_estimator != "passthrough":
/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps)
353 message_clsname="Pipeline",
354 message=self._log_message(step_idx),
--> 355 **fit_params_steps[name],
356 )
357 # Replace the transformer of the step with the fitted
/usr/local/lib/python3.7/dist-packages/joblib/memory.py in __call__(self, *args, **kwargs)
347
348 def __call__(self, *args, **kwargs):
--> 349 return self.func(*args, **kwargs)
350
351 def call_and_shelve(self, *args, **kwargs):
/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
891 with _print_elapsed_time(message_clsname, message):
892 if hasattr(transformer, "fit_transform"):
--> 893 res = transformer.fit_transform(X, y, **fit_params)
894 else:
895 res = transformer.fit(X, y, **fit_params).transform(X)
/usr/local/lib/python3.7/dist-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
670 self._check_n_features(X, reset=True)
671 self._validate_transformers()
--> 672 self._validate_column_callables(X)
673 self._validate_remainder(X)
674
/usr/local/lib/python3.7/dist-packages/sklearn/compose/_column_transformer.py in _validate_column_callables(self, X)
350 columns = columns(X)
351 all_columns.append(columns)
--> 352 transformer_to_input_indices[name] = _get_column_indices(X, columns)
353
354 self._columns = all_columns
/usr/local/lib/python3.7/dist-packages/sklearn/utils/__init__.py in _get_column_indices(X, key)
410 except AttributeError:
411 raise ValueError(
--> 412 "Specifying the columns using strings is only "
413 "supported for pandas DataFrames"
414 )
ValueError: Specifying the columns using strings is only supported for pandas DataFrames
这是我的代码:
def cross_val_predict(best_grid, kf:KFold, X:np.array, y:np.array) -> Tuple[np.array, np.array, np.array]:
model_ = cp.deepcopy(best_grid)
no_classes = len(np.unique(y))
actual_classes = np.empty([0], dtype=int)
predicted_classes = np.empty([0], dtype=int)
predicted_proba = np.empty([0, no_classes])
for train_idx, test_idx in kf.split(X):
train_X, train_y, test_X, test_y = X[train_idx], y[train_idx], X[test_idx], y[test_idx]
actual_classes = np.append(actual_classes, test_y)
model_.fit(train_X, train_y)
predicted_classes = np.append(predicted_classes, model_.predict(test_X))
try:
predicted_proba = np.append(predicted_proba, model_.predict_proba(test_X), axis=0)
except:
predicted_proba = np.append(predicted_proba, np.zeros((len(test_X), no_classes), dtype=float), axis=0)
return actual_classes, predicted_classes, predicted_proba
# Visualise the Confusion Matrix
def plot_confusion_matrix(actual_classes : np.array, predicted_classes : np.array, sorted_labels : list):
matrix = confusion_matrix(actual_classes, predicted_classes, labels=sorted_labels)
plt.figure(figsize=(12.8,6))
sns.heatmap(matrix, annot=True, xticklabels=sorted_labels, yticklabels=sorted_labels, cmap='Blues', fmt='g')
plt.xlabel('Predicted'); plt.ylabel('Actual'); plt.title('Confusion Matrix Visualization')
plt.show()
# call the functions
actual_classes, predicted_classes, _ = cross_val_predict(best_grid, kf, X.to_numpy(), y.to_numpy())
plot_confusion_matrix(actual_classes, predicted_classes, ['Rating_1', 'Rating_2', 'Rating_3', 'Rating_4', 'Rating_5'])
错误消息在我运行代码以调用函数之后出现。提前致谢。
Based on this post: https://towardsdatascience.com/how-to-plot-a-confusion-matrix-from-a-k-fold-cross-validation-b607317e9874, I want to visualize the confusion matrix of my dataset. But I get the following error message:
AttributeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/sklearn/utils/__init__.py in _get_column_indices(X, key)
408 try:
--> 409 all_columns = X.columns
410 except AttributeError:
AttributeError: 'numpy.ndarray' object has no attribute 'columns'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
8 frames
<ipython-input-76-827f7c5dca49> in <module>()
1 # call the functions
----> 2 actual_classes, predicted_classes, _ = cross_val_predict(best_grid, kf, X.to_numpy(), y.to_numpy())
3 plot_confusion_matrix(actual_classes, predicted_classes, ['Rating_1', 'Rating_2', 'Rating_3', 'Rating_4', 'Rating_5'])
<ipython-input-68-f1bff3bd9ebf> in cross_val_predict(best_grid, kf, X, y)
19 actual_classes = np.append(actual_classes, test_y)
20
---> 21 model_.fit(train_X, train_y)
22 predicted_classes = np.append(predicted_classes, model_.predict(test_X))
23
/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
388 """
389 fit_params_steps = self._check_fit_params(**fit_params)
--> 390 Xt = self._fit(X, y, **fit_params_steps)
391 with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
392 if self._final_estimator != "passthrough":
/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps)
353 message_clsname="Pipeline",
354 message=self._log_message(step_idx),
--> 355 **fit_params_steps[name],
356 )
357 # Replace the transformer of the step with the fitted
/usr/local/lib/python3.7/dist-packages/joblib/memory.py in __call__(self, *args, **kwargs)
347
348 def __call__(self, *args, **kwargs):
--> 349 return self.func(*args, **kwargs)
350
351 def call_and_shelve(self, *args, **kwargs):
/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
891 with _print_elapsed_time(message_clsname, message):
892 if hasattr(transformer, "fit_transform"):
--> 893 res = transformer.fit_transform(X, y, **fit_params)
894 else:
895 res = transformer.fit(X, y, **fit_params).transform(X)
/usr/local/lib/python3.7/dist-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
670 self._check_n_features(X, reset=True)
671 self._validate_transformers()
--> 672 self._validate_column_callables(X)
673 self._validate_remainder(X)
674
/usr/local/lib/python3.7/dist-packages/sklearn/compose/_column_transformer.py in _validate_column_callables(self, X)
350 columns = columns(X)
351 all_columns.append(columns)
--> 352 transformer_to_input_indices[name] = _get_column_indices(X, columns)
353
354 self._columns = all_columns
/usr/local/lib/python3.7/dist-packages/sklearn/utils/__init__.py in _get_column_indices(X, key)
410 except AttributeError:
411 raise ValueError(
--> 412 "Specifying the columns using strings is only "
413 "supported for pandas DataFrames"
414 )
ValueError: Specifying the columns using strings is only supported for pandas DataFrames
Here is my code:
def cross_val_predict(best_grid, kf:KFold, X:np.array, y:np.array) -> Tuple[np.array, np.array, np.array]:
model_ = cp.deepcopy(best_grid)
no_classes = len(np.unique(y))
actual_classes = np.empty([0], dtype=int)
predicted_classes = np.empty([0], dtype=int)
predicted_proba = np.empty([0, no_classes])
for train_idx, test_idx in kf.split(X):
train_X, train_y, test_X, test_y = X[train_idx], y[train_idx], X[test_idx], y[test_idx]
actual_classes = np.append(actual_classes, test_y)
model_.fit(train_X, train_y)
predicted_classes = np.append(predicted_classes, model_.predict(test_X))
try:
predicted_proba = np.append(predicted_proba, model_.predict_proba(test_X), axis=0)
except:
predicted_proba = np.append(predicted_proba, np.zeros((len(test_X), no_classes), dtype=float), axis=0)
return actual_classes, predicted_classes, predicted_proba
# Visualise the Confusion Matrix
def plot_confusion_matrix(actual_classes : np.array, predicted_classes : np.array, sorted_labels : list):
matrix = confusion_matrix(actual_classes, predicted_classes, labels=sorted_labels)
plt.figure(figsize=(12.8,6))
sns.heatmap(matrix, annot=True, xticklabels=sorted_labels, yticklabels=sorted_labels, cmap='Blues', fmt='g')
plt.xlabel('Predicted'); plt.ylabel('Actual'); plt.title('Confusion Matrix Visualization')
plt.show()
# call the functions
actual_classes, predicted_classes, _ = cross_val_predict(best_grid, kf, X.to_numpy(), y.to_numpy())
plot_confusion_matrix(actual_classes, predicted_classes, ['Rating_1', 'Rating_2', 'Rating_3', 'Rating_4', 'Rating_5'])
The error message came after I ran the code to call the functions. Thanks in advance.
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论