KUBEFLOW:没有这样的文件或目录错误

发布于 2025-01-11 04:46:42 字数 3320 浏览 3 评论 0原文

我正在创建一个非常简单的 kubeflow 管道,它具有以下步骤:

  1. 数据加载
  2. 数据预处理
  3. 训练
  4. 评估 前 3 个步骤成功运行,但一旦我尝试运行评估器,该步骤就会失败并出现上述错误。以下是 kubeflow 管道的 python 代码:
import kfp
from kfp import dsl
from kfp.compiler import Compiler


def dataloader_op():
    return dsl.ContainerOp(
        name="Data Loader",
        command= "python",
        image="racahu23/ml-blueprint_dataloader:8",
        arguments=[
            "dataloader.py",
            "--platform", "aws",
            "--bucketname", "new-classification",
            "--remoteDirectoryName", "datasets"
        ],
        # arguments=[
        #     "dataloader.py"
        # ],
        file_outputs={
            "datasets": "/home/user/datasets"
        }
    )
def datapreprocessor(datasets):
    return dsl.ContainerOp(
        name="Data Preprocessor",
        command= "python",
        image="racahu23/ml-blueprint_preprocessor:7",
        arguments=[
            "datapreprocessor.py",
            "--input_dir", datasets,
            "--output_dir", "Data"
        ],
        file_outputs={
            "Data": "/home/user/Data"

        }

    )


def trainer(Data):
    return dsl.ContainerOp(
        name="Model Trainer",
        command="python",
        #image="racahu23/ml-blueprint_trainer:1",
        image="racahu23/ml-blueprint_trainer:12",
        arguments=[
            "primary_trainer.py",
            "--input_dir", Data,
            "--output_dir", "model/",
            "--num_labels", 9,
            "--logging_dir", "logs/",
            '--num_train_epochs', 1,
            '--evaluation_strategy', "epoch",
            '--per_device_train_batch_size', 32,
            '--per_device_eval_batch_size', 64,
            '--save_strategy', "epoch",
            '--logging_strategy', "epoch",
            '--eval_steps', 100,

        ],
        file_outputs={
            "model": "/home/user/model"
        }
    )
def evaluator(model, Data):
    return dsl.ContainerOp(
        name= "Model Evaluator",
        command= "python",
        image= "racahu23/ml-blueprint_evaluator:3",
        arguments=[
            '--data_dir', Data,
            '--model_dir', model,
            '--output_dir', "output/",
            '--save_strategy', "accuracy",


        ],
        file_outputs={
            "output": "/home/user/output"
        }

    )

@dsl.pipeline(
    name="ML_BLUEPRINT",
    description="A generic kubeflow pipeline that trains hugging face transformers for text classification"
)
def blueprint_pipeline():
    _dataloader_op = dataloader_op()

    _preprocessor_op = datapreprocessor(
        dsl.InputArgumentPath(_dataloader_op.outputs["datasets"])
    ).after(_dataloader_op)

    _trainer_op = trainer(
        dsl.InputArgumentPath(_preprocessor_op.outputs["Data"])
    ).after(_preprocessor_op)
    _evaluator_op = evaluator(dsl.InputArgumentPath(_trainer_op.outputs["model"]),
                              dsl.InputArgumentPath(_preprocessor_op.outputs["Data"])).after(_trainer_op)

client = kfp.Client(namespace="kubeflow",host="http://localhost:8080")
client.create_run_from_pipeline_func(blueprint_pipeline, arguments={})

显示的错误是: 输入图片此处描述

I am creating a very simple kubeflow pipeline which has the following steps:

  1. Data Loading
  2. Data Preprocessing
  3. Training
  4. Evaluation
    The first 3 steps runs successfully but as soon as I try to run the evaluator the step fails with the error mentioned above. Here is the python code for the kubeflow pipeline:
import kfp
from kfp import dsl
from kfp.compiler import Compiler


def dataloader_op():
    return dsl.ContainerOp(
        name="Data Loader",
        command= "python",
        image="racahu23/ml-blueprint_dataloader:8",
        arguments=[
            "dataloader.py",
            "--platform", "aws",
            "--bucketname", "new-classification",
            "--remoteDirectoryName", "datasets"
        ],
        # arguments=[
        #     "dataloader.py"
        # ],
        file_outputs={
            "datasets": "/home/user/datasets"
        }
    )
def datapreprocessor(datasets):
    return dsl.ContainerOp(
        name="Data Preprocessor",
        command= "python",
        image="racahu23/ml-blueprint_preprocessor:7",
        arguments=[
            "datapreprocessor.py",
            "--input_dir", datasets,
            "--output_dir", "Data"
        ],
        file_outputs={
            "Data": "/home/user/Data"

        }

    )


def trainer(Data):
    return dsl.ContainerOp(
        name="Model Trainer",
        command="python",
        #image="racahu23/ml-blueprint_trainer:1",
        image="racahu23/ml-blueprint_trainer:12",
        arguments=[
            "primary_trainer.py",
            "--input_dir", Data,
            "--output_dir", "model/",
            "--num_labels", 9,
            "--logging_dir", "logs/",
            '--num_train_epochs', 1,
            '--evaluation_strategy', "epoch",
            '--per_device_train_batch_size', 32,
            '--per_device_eval_batch_size', 64,
            '--save_strategy', "epoch",
            '--logging_strategy', "epoch",
            '--eval_steps', 100,

        ],
        file_outputs={
            "model": "/home/user/model"
        }
    )
def evaluator(model, Data):
    return dsl.ContainerOp(
        name= "Model Evaluator",
        command= "python",
        image= "racahu23/ml-blueprint_evaluator:3",
        arguments=[
            '--data_dir', Data,
            '--model_dir', model,
            '--output_dir', "output/",
            '--save_strategy', "accuracy",


        ],
        file_outputs={
            "output": "/home/user/output"
        }

    )

@dsl.pipeline(
    name="ML_BLUEPRINT",
    description="A generic kubeflow pipeline that trains hugging face transformers for text classification"
)
def blueprint_pipeline():
    _dataloader_op = dataloader_op()

    _preprocessor_op = datapreprocessor(
        dsl.InputArgumentPath(_dataloader_op.outputs["datasets"])
    ).after(_dataloader_op)

    _trainer_op = trainer(
        dsl.InputArgumentPath(_preprocessor_op.outputs["Data"])
    ).after(_preprocessor_op)
    _evaluator_op = evaluator(dsl.InputArgumentPath(_trainer_op.outputs["model"]),
                              dsl.InputArgumentPath(_preprocessor_op.outputs["Data"])).after(_trainer_op)

client = kfp.Client(namespace="kubeflow",host="http://localhost:8080")
client.create_run_from_pipeline_func(blueprint_pipeline, arguments={})

The error that is displayed is :
enter image description here

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文