MNIST 数据集过度拟合

发布于 2025-01-17 05:27:06 字数 3369 浏览 5 评论 0原文

 class NN(nn.Module):
def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784,4096),
    nn.ReLU(),
    nn.Linear(4096,2048),
    nn.ReLU(),
    nn.Linear(2048,1024),
    nn.ReLU(),
    nn.Linear(1024,512),
    nn.ReLU(),
    nn.Linear(512,256),
    nn.ReLU(),
    nn.Linear(256,128),
    nn.ReLU(),
    nn.Linear(128,64),
    nn.ReLU(),
    nn.Linear(64,32),
    nn.ReLU(),
    nn.Linear(32,16),
    nn.ReLU(),
    nn.Linear(16,10))
def forward(self,x):
    return self.layers(x)
def accuracy_and_loss(model, loss_function, dataloader):
    total_correct = 0
    total_loss = 0
    total_examples = 0
    n_batches = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = model(images)
            batch_loss = loss_function(outputs,labels)
            n_batches += 1
            total_loss += batch_loss.item()
            _, predicted = torch.max(outputs, dim=1)
            total_examples += labels.size(0)
            total_correct += (predicted == labels).sum().item()
    accuracy = total_correct / total_examples
    mean_loss = total_loss / n_batches
    return (accuracy, mean_loss)
def define_and_train(model,dataset_training, dataset_test):
trainloader = torch.utils.data.DataLoader( small_trainset, batch_size=500, shuffle=True)
testloader = torch.utils.data.DataLoader( dataset_test, batch_size=500, shuffle=True)
values = [1e-8,1e-7,1e-6,1e-5]
model = NN()
for params in values:
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay = 1e-7)
    train_acc = []
    val_acc = []
    train_loss = []
    val_loss = []
    for epoch in range(100):
    
        total_loss = 0
        total_correct = 0
        total_examples = 0
        n_mini_batches = 0
    
        for i,mini_batch in enumerate(trainloader,0):
        
            images,labels = mini_batch
            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_function(outputs,labels)
            loss.backward()
            optimizer.step()
            n_mini_batches += 1
            total_loss += loss.item()
            _, predicted = torch.max(outputs, dim=1)
            total_examples += labels.size(0)
            total_correct += (predicted == labels).sum().item()
        
        epoch_training_accuracy = total_correct / total_examples
        epoch_training_loss = total_loss / n_mini_batches
        epoch_val_accuracy, epoch_val_loss = accuracy_and_loss( model, loss_function, testloader )

        print('Params %f Epoch %d loss: %.3f acc: %.3f val_loss: %.3f val_acc: %.3f'
              %(params, epoch+1, epoch_training_loss, epoch_training_accuracy, epoch_val_loss, epoch_val_accuracy))
    
        train_loss.append( epoch_training_loss )
        train_acc.append( epoch_training_accuracy )
        val_loss.append( epoch_val_loss )
        val_acc.append( epoch_val_accuracy )

    history = { 'train_loss': train_loss, 
                'train_acc': train_acc, 
                'val_loss': val_loss,
                'val_acc': val_acc }
    return ( history, model ) 
history1, net1 = define_and_train(model,dataset_training,dataset_test)

我正在尝试过度拟合训练数据,以便稍后我可以应用正则化,然后减少过度拟合,这将使我更好地理解该过程

This is the plot that i am gettingI am working with the MNIST dataset and I have created the following network. I want to overfit the training data and I think I am doing that here. My training loss is lower than my validation loss. This is the code that I have come up with. Please look at it and let me know if I am overfitting the training data, if I am not then how do I go about it?

 class NN(nn.Module):
def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784,4096),
    nn.ReLU(),
    nn.Linear(4096,2048),
    nn.ReLU(),
    nn.Linear(2048,1024),
    nn.ReLU(),
    nn.Linear(1024,512),
    nn.ReLU(),
    nn.Linear(512,256),
    nn.ReLU(),
    nn.Linear(256,128),
    nn.ReLU(),
    nn.Linear(128,64),
    nn.ReLU(),
    nn.Linear(64,32),
    nn.ReLU(),
    nn.Linear(32,16),
    nn.ReLU(),
    nn.Linear(16,10))
def forward(self,x):
    return self.layers(x)
def accuracy_and_loss(model, loss_function, dataloader):
    total_correct = 0
    total_loss = 0
    total_examples = 0
    n_batches = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = model(images)
            batch_loss = loss_function(outputs,labels)
            n_batches += 1
            total_loss += batch_loss.item()
            _, predicted = torch.max(outputs, dim=1)
            total_examples += labels.size(0)
            total_correct += (predicted == labels).sum().item()
    accuracy = total_correct / total_examples
    mean_loss = total_loss / n_batches
    return (accuracy, mean_loss)
def define_and_train(model,dataset_training, dataset_test):
trainloader = torch.utils.data.DataLoader( small_trainset, batch_size=500, shuffle=True)
testloader = torch.utils.data.DataLoader( dataset_test, batch_size=500, shuffle=True)
values = [1e-8,1e-7,1e-6,1e-5]
model = NN()
for params in values:
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay = 1e-7)
    train_acc = []
    val_acc = []
    train_loss = []
    val_loss = []
    for epoch in range(100):
    
        total_loss = 0
        total_correct = 0
        total_examples = 0
        n_mini_batches = 0
    
        for i,mini_batch in enumerate(trainloader,0):
        
            images,labels = mini_batch
            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_function(outputs,labels)
            loss.backward()
            optimizer.step()
            n_mini_batches += 1
            total_loss += loss.item()
            _, predicted = torch.max(outputs, dim=1)
            total_examples += labels.size(0)
            total_correct += (predicted == labels).sum().item()
        
        epoch_training_accuracy = total_correct / total_examples
        epoch_training_loss = total_loss / n_mini_batches
        epoch_val_accuracy, epoch_val_loss = accuracy_and_loss( model, loss_function, testloader )

        print('Params %f Epoch %d loss: %.3f acc: %.3f val_loss: %.3f val_acc: %.3f'
              %(params, epoch+1, epoch_training_loss, epoch_training_accuracy, epoch_val_loss, epoch_val_accuracy))
    
        train_loss.append( epoch_training_loss )
        train_acc.append( epoch_training_accuracy )
        val_loss.append( epoch_val_loss )
        val_acc.append( epoch_val_accuracy )

    history = { 'train_loss': train_loss, 
                'train_acc': train_acc, 
                'val_loss': val_loss,
                'val_acc': val_acc }
    return ( history, model ) 
history1, net1 = define_and_train(model,dataset_training,dataset_test)

I am trying to overfit the training data so that later i can apply regularization and then reduce the overfitting which will give me a better understanding of the process

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

陌伤浅笑 2025-01-24 05:27:06

尽管我不会尝试提供严格的定义,但术语“过度拟合”通常意味着训练损失持续减少,而验证损失则停滞在高于训练损失的位置,或者随着更多迭代而继续增加。

因此,仅根据您的代码很难知道您的网络是否过度拟合。由于密集的、完全连接的网络在没有 dropout 层或其他正则化器的情况下往往很容易过度拟合,因此我的预感是您的网络确实根据您的意图过度拟合。但是,我们必须查看您的张量板日志或损失图来确定模型是否过度拟合。

如果您想让网络与数据集过度拟合,我建议您构建一个具有更多隐藏层的更大模型。当数据集对于模型来说“太简单”并且模型开始记住训练集本身而不学习可应用于验证集的通用模式时,就会发生过度拟合。

Although I won't attempt to provide a rigorous definition, the term "overfit" typically means that the training loss continues to decrease whereas the validation loss stays stagnant at a position higher than the training loss, or continues to increase with more iterations.

Therefore, it is difficult to know whether your network is overfitting solely based on your code alone. Since dense, fully-connected networks tend to overfit easily in the absence of dropout layers or other regularizers, my hunch would be that your network is indeed overfitting according to your intention. However, we would have to see your tensorboard logs or loss plot to determine whether the model is overfitting.

If you want to overfit your network to the dataset, I suggest that you construct a much larger model with more hidden layers. Overfitting occurs when the dataset is "too easy" for the model and it starts to remember the training set itself without learning generalizable patterns that can be applied to the validation set.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文