完成主动学习循环后,如何提取训练集中的图像名称和标签并将其写入 CSV 文件?
我正在使用 Keras 脚本 https://modal-python .readthedocs.io/en/latest/content/examples/Keras_integration.html 对二元分类任务执行主动学习。完成主动学习循环后,我们如何提取训练集中提供最佳测试性能的图像名称和标签并将它们写入 CSV 文件?
from tensorflow import keras
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from modAL.models import ActiveLearner
from modAL.uncertainty import entropy_sampling
# build function for the Keras' scikit-learn API
def create_keras_model():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
return model
# create the classifier
classifier = KerasClassifier(create_keras_model)
# read training data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 28, 28, 1).astype('float32') / 255
X_test = X_test.reshape(10000, 28, 28, 1).astype('float32') / 255
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
# assemble initial data
n_initial = 1000
initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False)
X_initial = X_train[initial_idx]
y_initial = y_train[initial_idx]
# generate the pool
# remove the initial data from the training dataset
X_pool = np.delete(X_train, initial_idx, axis=0)
y_pool = np.delete(y_train, initial_idx, axis=0)
"""
Training the ActiveLearner
"""
# initialize ActiveLearner
learner = ActiveLearner(
estimator=classifier,
X_training=X_initial, y_training=y_initial,
verbose=1
)
# the active learning loop
n_queries = 100
for idx in range(n_queries):
query_idx, query_instance = learner.query(X_pool, n_instances=100, verbose=0)
print(query_idx)
learner.teach(
X=X_pool[query_idx], y=y_pool[query_idx], only_new=True,
verbose=1
)
# remove queried instance from pool
X_pool = np.delete(X_pool, query_idx, axis=0)
y_pool = np.delete(y_pool, query_idx, axis=0)
# the final accuracy score
print(learner.score(X_test, y_test, verbose=1))
I am using the Keras script at https://modal-python.readthedocs.io/en/latest/content/examples/Keras_integration.html to perform active learning for a binary classification task. After completing the active learning loop, how do we extract the image names and labels in the training set that gives the optimal test performance and write them to a CSV file?
from tensorflow import keras
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from modAL.models import ActiveLearner
from modAL.uncertainty import entropy_sampling
# build function for the Keras' scikit-learn API
def create_keras_model():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
return model
# create the classifier
classifier = KerasClassifier(create_keras_model)
# read training data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 28, 28, 1).astype('float32') / 255
X_test = X_test.reshape(10000, 28, 28, 1).astype('float32') / 255
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
# assemble initial data
n_initial = 1000
initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False)
X_initial = X_train[initial_idx]
y_initial = y_train[initial_idx]
# generate the pool
# remove the initial data from the training dataset
X_pool = np.delete(X_train, initial_idx, axis=0)
y_pool = np.delete(y_train, initial_idx, axis=0)
"""
Training the ActiveLearner
"""
# initialize ActiveLearner
learner = ActiveLearner(
estimator=classifier,
X_training=X_initial, y_training=y_initial,
verbose=1
)
# the active learning loop
n_queries = 100
for idx in range(n_queries):
query_idx, query_instance = learner.query(X_pool, n_instances=100, verbose=0)
print(query_idx)
learner.teach(
X=X_pool[query_idx], y=y_pool[query_idx], only_new=True,
verbose=1
)
# remove queried instance from pool
X_pool = np.delete(X_pool, query_idx, axis=0)
y_pool = np.delete(y_pool, query_idx, axis=0)
# the final accuracy score
print(learner.score(X_test, y_test, verbose=1))
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论