KUBEFLOW:没有这样的文件或目录错误
我正在创建一个非常简单的 kubeflow 管道,它具有以下步骤:
- 数据加载
- 数据预处理
- 训练
- 评估 前 3 个步骤成功运行,但一旦我尝试运行评估器,该步骤就会失败并出现上述错误。以下是 kubeflow 管道的 python 代码:
import kfp
from kfp import dsl
from kfp.compiler import Compiler
def dataloader_op():
return dsl.ContainerOp(
name="Data Loader",
command= "python",
image="racahu23/ml-blueprint_dataloader:8",
arguments=[
"dataloader.py",
"--platform", "aws",
"--bucketname", "new-classification",
"--remoteDirectoryName", "datasets"
],
# arguments=[
# "dataloader.py"
# ],
file_outputs={
"datasets": "/home/user/datasets"
}
)
def datapreprocessor(datasets):
return dsl.ContainerOp(
name="Data Preprocessor",
command= "python",
image="racahu23/ml-blueprint_preprocessor:7",
arguments=[
"datapreprocessor.py",
"--input_dir", datasets,
"--output_dir", "Data"
],
file_outputs={
"Data": "/home/user/Data"
}
)
def trainer(Data):
return dsl.ContainerOp(
name="Model Trainer",
command="python",
#image="racahu23/ml-blueprint_trainer:1",
image="racahu23/ml-blueprint_trainer:12",
arguments=[
"primary_trainer.py",
"--input_dir", Data,
"--output_dir", "model/",
"--num_labels", 9,
"--logging_dir", "logs/",
'--num_train_epochs', 1,
'--evaluation_strategy', "epoch",
'--per_device_train_batch_size', 32,
'--per_device_eval_batch_size', 64,
'--save_strategy', "epoch",
'--logging_strategy', "epoch",
'--eval_steps', 100,
],
file_outputs={
"model": "/home/user/model"
}
)
def evaluator(model, Data):
return dsl.ContainerOp(
name= "Model Evaluator",
command= "python",
image= "racahu23/ml-blueprint_evaluator:3",
arguments=[
'--data_dir', Data,
'--model_dir', model,
'--output_dir', "output/",
'--save_strategy', "accuracy",
],
file_outputs={
"output": "/home/user/output"
}
)
@dsl.pipeline(
name="ML_BLUEPRINT",
description="A generic kubeflow pipeline that trains hugging face transformers for text classification"
)
def blueprint_pipeline():
_dataloader_op = dataloader_op()
_preprocessor_op = datapreprocessor(
dsl.InputArgumentPath(_dataloader_op.outputs["datasets"])
).after(_dataloader_op)
_trainer_op = trainer(
dsl.InputArgumentPath(_preprocessor_op.outputs["Data"])
).after(_preprocessor_op)
_evaluator_op = evaluator(dsl.InputArgumentPath(_trainer_op.outputs["model"]),
dsl.InputArgumentPath(_preprocessor_op.outputs["Data"])).after(_trainer_op)
client = kfp.Client(namespace="kubeflow",host="http://localhost:8080")
client.create_run_from_pipeline_func(blueprint_pipeline, arguments={})
I am creating a very simple kubeflow pipeline which has the following steps:
- Data Loading
- Data Preprocessing
- Training
- Evaluation
The first 3 steps runs successfully but as soon as I try to run the evaluator the step fails with the error mentioned above. Here is the python code for the kubeflow pipeline:
import kfp
from kfp import dsl
from kfp.compiler import Compiler
def dataloader_op():
return dsl.ContainerOp(
name="Data Loader",
command= "python",
image="racahu23/ml-blueprint_dataloader:8",
arguments=[
"dataloader.py",
"--platform", "aws",
"--bucketname", "new-classification",
"--remoteDirectoryName", "datasets"
],
# arguments=[
# "dataloader.py"
# ],
file_outputs={
"datasets": "/home/user/datasets"
}
)
def datapreprocessor(datasets):
return dsl.ContainerOp(
name="Data Preprocessor",
command= "python",
image="racahu23/ml-blueprint_preprocessor:7",
arguments=[
"datapreprocessor.py",
"--input_dir", datasets,
"--output_dir", "Data"
],
file_outputs={
"Data": "/home/user/Data"
}
)
def trainer(Data):
return dsl.ContainerOp(
name="Model Trainer",
command="python",
#image="racahu23/ml-blueprint_trainer:1",
image="racahu23/ml-blueprint_trainer:12",
arguments=[
"primary_trainer.py",
"--input_dir", Data,
"--output_dir", "model/",
"--num_labels", 9,
"--logging_dir", "logs/",
'--num_train_epochs', 1,
'--evaluation_strategy', "epoch",
'--per_device_train_batch_size', 32,
'--per_device_eval_batch_size', 64,
'--save_strategy', "epoch",
'--logging_strategy', "epoch",
'--eval_steps', 100,
],
file_outputs={
"model": "/home/user/model"
}
)
def evaluator(model, Data):
return dsl.ContainerOp(
name= "Model Evaluator",
command= "python",
image= "racahu23/ml-blueprint_evaluator:3",
arguments=[
'--data_dir', Data,
'--model_dir', model,
'--output_dir', "output/",
'--save_strategy', "accuracy",
],
file_outputs={
"output": "/home/user/output"
}
)
@dsl.pipeline(
name="ML_BLUEPRINT",
description="A generic kubeflow pipeline that trains hugging face transformers for text classification"
)
def blueprint_pipeline():
_dataloader_op = dataloader_op()
_preprocessor_op = datapreprocessor(
dsl.InputArgumentPath(_dataloader_op.outputs["datasets"])
).after(_dataloader_op)
_trainer_op = trainer(
dsl.InputArgumentPath(_preprocessor_op.outputs["Data"])
).after(_preprocessor_op)
_evaluator_op = evaluator(dsl.InputArgumentPath(_trainer_op.outputs["model"]),
dsl.InputArgumentPath(_preprocessor_op.outputs["Data"])).after(_trainer_op)
client = kfp.Client(namespace="kubeflow",host="http://localhost:8080")
client.create_run_from_pipeline_func(blueprint_pipeline, arguments={})
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论