Windows 上尝试 python 多处理编码时出现运行时错误
我正在尝试运行 python 多处理库来加速 csv 文件的编码。但是我遇到了这个错误:
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
我确实创建了
Search = SemanticSearch(model_path, data_path, query)
if __name__ == '__main__':
query, flat, top_results = Search.search()
That 指向我的类中的函数,
def setup(self):
with open(self.data_path, newline='') as f: # read and sort data
reader = csv.reader(f)
data1 = list(reader)
self.corpus = [x for sublist in data1 for x in sublist] # turn into 1D list
#SemanticSearch.encode(self)
self.texts_encodings = self.map(self.encode, self.corpus)
end = time.time()
print(end - self.start)
def encode(self):
self.start = time.time()
return self.model.encode(self.corpus, convert_to_tensor=True, show_progress_bar=True)
在我的 init 函数中我确实调用并设置了
self.map = Pool().map
我有任何提示丢失的?提前致谢
编辑
class SemanticSearch(object):
def __init__(self, model, data, query):
self.query = query
self.model = SentenceTransformer(model) ### Model location
self.data_path = data ###path to csv
self.corpus = None
self.texts_encodings = None
self.start = None
self.map = Pool().map
def setup(self):
print('here')
with open(self.data_path, newline='') as f: # read and sort data
reader = csv.reader(f)
data1 = list(reader)
self.corpus = [x for sublist in data1 for x in sublist] # turn into 1D list
# SemanticSearch.encode(self)
self.texts_encodings = self.map(self.encode, self.corpus)
# SemanticSearch.encode(self)
end = time.time()
print(end - self.start)
def encode(self):
self.start = time.time()
return self.model.encode(self.corpus, convert_to_tensor=True,
show_progress_bar=True) ##encode to invisible layer
def search(self):
SemanticSearch.setup(self)
if __name__ == "__main__":
model_path = r'data\BERT_MODELS\fine-tuned\multi-qa-MiniLM-L6-cos-v1'
data_path = 'data/raw_data/Jira-2_14_2022.csv'
query = 'query'
Search = SemanticSearch(model_path, data_path, query)
query, flat, top_results = Search.search()
I'm trying to run the python multiprocessing library to speed up encoding of csv file. However I run into this error:
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
I did create
Search = SemanticSearch(model_path, data_path, query)
if __name__ == '__main__':
query, flat, top_results = Search.search()
That points to the function in my class,
def setup(self):
with open(self.data_path, newline='') as f: # read and sort data
reader = csv.reader(f)
data1 = list(reader)
self.corpus = [x for sublist in data1 for x in sublist] # turn into 1D list
#SemanticSearch.encode(self)
self.texts_encodings = self.map(self.encode, self.corpus)
end = time.time()
print(end - self.start)
def encode(self):
self.start = time.time()
return self.model.encode(self.corpus, convert_to_tensor=True, show_progress_bar=True)
In my init function I did call and set
self.map = Pool().map
Any tips something I'm missing? Thanks in advance
EDIT
class SemanticSearch(object):
def __init__(self, model, data, query):
self.query = query
self.model = SentenceTransformer(model) ### Model location
self.data_path = data ###path to csv
self.corpus = None
self.texts_encodings = None
self.start = None
self.map = Pool().map
def setup(self):
print('here')
with open(self.data_path, newline='') as f: # read and sort data
reader = csv.reader(f)
data1 = list(reader)
self.corpus = [x for sublist in data1 for x in sublist] # turn into 1D list
# SemanticSearch.encode(self)
self.texts_encodings = self.map(self.encode, self.corpus)
# SemanticSearch.encode(self)
end = time.time()
print(end - self.start)
def encode(self):
self.start = time.time()
return self.model.encode(self.corpus, convert_to_tensor=True,
show_progress_bar=True) ##encode to invisible layer
def search(self):
SemanticSearch.setup(self)
if __name__ == "__main__":
model_path = r'data\BERT_MODELS\fine-tuned\multi-qa-MiniLM-L6-cos-v1'
data_path = 'data/raw_data/Jira-2_14_2022.csv'
query = 'query'
Search = SemanticSearch(model_path, data_path, query)
query, flat, top_results = Search.search()
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论