我有一个接受两个输入的模型。我想在多个GPU上运行推断,其中一个输入是固定的,而另一个是更改的。所以,假设我使用n个GPU,每个GPU都有模型的副本。第一个gpu处理输入对(a_1,b),第二个进程(a_2,b)等等。所有的输出都被保存为文件,所以我不需要对输出执行联接操作。我怎样才能用DDP或其他方式做到这一点呢?
发布于 2022-10-10 21:50:01
我已经想出了如何使用torch.multiprocessing.Queue来完成这个任务。
import torch
import torch.multiprocessing as mp
from absl import app, flags
from torchvision.models import AlexNet
FLAGS = flags.FLAGS
flags.DEFINE_integer("num_processes", 2, "Number of subprocesses to use")
def infer(rank, queue):
"""Each subprocess will run this function on a different GPU which is indicated by the parameter `rank`."""
model = AlexNet()
device = torch.device(f"cuda:{rank}")
model.to(device)
while True:
a, b = queue.get()
if a is None: # check for sentinel value
break
x = a + b
x = x.to(device)
model(x)
del a, b # free memory
print(f"Inference on process {rank}")
def main(argv):
queue = mp.Queue()
processes = []
for rank in range(FLAGS.num_processes):
p = mp.Process(target=infer, args=(rank, queue))
p.start()
processes.append(p)
for _ in range(10):
a_1 = torch.randn(1, 3, 224, 224)
a_2 = torch.randn(1, 3, 224, 224)
b = torch.randn(1, 3, 224, 224)
queue.put((a_1, b))
queue.put((a_2, b))
for _ in range(FLAGS.num_processes):
queue.put((None, None)) # sentinel value to signal subprocesses to exit
for p in processes:
p.join() # wait for all subprocesses to finish
if __name__ == "__main__":
app.run(main)https://stackoverflow.com/questions/73999265
复制相似问题