PyTorchのC++ APIを試してみる

PyTorchではC++ APIが提供されている.

pytorch.org

多層パーセプトロン

  • ビルド
$ mkdir build
$ cd build
$ cmake ..
$ cmake -DCMAKE_PREFIX_PATH=/absolute/path/to/libtorch ..
$ make
#include <torch/torch.h>

struct Net : torch::nn::Module {
  Net() {
    fc1 = register_module("fc1", torch::nn::Linear(784, 64));
    fc2 = register_module("fc2", torch::nn::Linear(64, 32));
    fc3 = register_module("fc3", torch::nn::Linear(32, 10));
  }

  torch::Tensor forward(torch::Tensor x) {
    x = torch::relu(fc1->forward(x.reshape({x.size(0), 784})));
    x = torch::dropout(x, /*p=*/0.5, /*train=*/is_training());
    x = torch::relu(fc2->forward(x));
    x = torch::log_softmax(fc3->forward(x), /*dim=*/1);
    return x;
  }

  torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr};
};

int main() {
  auto net = std::make_shared<Net>();

  auto data_loader = torch::data::make_data_loader(
      torch::data::datasets::MNIST("./data").map(
          torch::data::transforms::Stack<>()),
      /*batch_size=*/64);

  torch::optim::SGD optimizer(net->parameters(), /*lr=*/0.01);

  for (size_t epoch = 1; epoch <= 10; ++epoch) {
    torch::Tensor loss;
    for (auto& batch : *data_loader) {
      optimizer.zero_grad();

      torch::Tensor prediction = net->forward(batch.data);
      loss = torch::nll_loss(prediction, batch.target);

      loss.backward();
      optimizer.step();
    }
    std::cout << "Epoch: " << epoch << " | Loss: " << loss.item<float>() << std::endl;
  }
}

実験

$  time ./simple_mnist
Epoch: 1 | Loss: 1.0654
Epoch: 2 | Loss: 0.912221
Epoch: 3 | Loss: 0.585899
Epoch: 4 | Loss: 0.733495
Epoch: 5 | Loss: 0.408964
Epoch: 6 | Loss: 0.52878
Epoch: 7 | Loss: 0.245786
Epoch: 8 | Loss: 0.161901
Epoch: 9 | Loss: 0.138979
Epoch: 10 | Loss: 0.178226

real  0m11.544s
user  0m11.723s
sys   0m0.579s

速い...

Python APIと比較

  • C++ APIと同じネットワークで実験
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = F.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
$ time python mnist.py
Train Epoch: 1  Loss: 0.553110
Train Epoch: 2  Loss: 0.268289
Train Epoch: 3  Loss: 0.469516
Train Epoch: 4  Loss: 0.286759
Train Epoch: 5  Loss: 0.391750
Train Epoch: 6  Loss: 0.113655
Train Epoch: 7  Loss: 0.208460
Train Epoch: 8  Loss: 0.171030
Train Epoch: 9  Loss: 0.165328
Train Epoch: 10     Loss: 0.194463

real  1m18.185s
user  1m15.111s
sys   0m1.610s
  • かなり差がついている

畳み込みニューラルネットワーク

  • CNNでも試してみる

C++ API

struct Net : torch::nn::Module {
  Net()
      : conv1(torch::nn::Conv2dOptions(1, 10, /*kernel_size=*/5)),
        conv2(torch::nn::Conv2dOptions(10, 20, /*kernel_size=*/5)),
        fc1(320, 50),
        fc2(50, 10) {
    register_module("conv1", conv1);
    register_module("conv2", conv2);
    register_module("conv2_drop", conv2_drop);
    register_module("fc1", fc1);
    register_module("fc2", fc2);
  }

  torch::Tensor forward(torch::Tensor x) {
    x = torch::relu(torch::max_pool2d(conv1->forward(x), 2));
    x = torch::relu(
        torch::max_pool2d(conv2_drop->forward(conv2->forward(x)), 2));
    x = x.view({-1, 320});
    x = torch::relu(fc1->forward(x));
    x = torch::dropout(x, /*p=*/0.5, /*training=*/is_training());
    x = fc2->forward(x);
    return torch::log_softmax(x, /*dim=*/1);
  }

  torch::nn::Conv2d conv1;
  torch::nn::Conv2d conv2;
  torch::nn::FeatureDropout conv2_drop;
  torch::nn::Linear fc1;
  torch::nn::Linear fc2;
};

Python API

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, 5, 1)
        self.conv2 = nn.Conv2d(10, 20, 5, 1)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

結果

Training on CPU
Train Epoch: 1  Loss: 0.482753
Test set: Average loss: 0.210824, Accuracy: 0.9351
Train Epoch: 2  Loss: 0.303543
Test set: Average loss: 0.12888, Accuracy: 0.9595
Train Epoch: 3  Loss: 0.268524
Test set: Average loss: 0.105587, Accuracy: 0.9659
Train Epoch: 4  Loss: 0.0948553
Test set: Average loss: 0.0887088, Accuracy: 0.9722
Train Epoch: 5  Loss: 0.0923207
Test set: Average loss: 0.0826953, Accuracy: 0.9738
Train Epoch: 6  Loss: 0.0482286
Test set: Average loss: 0.0713827, Accuracy: 0.977
Train Epoch: 7  Loss: 0.0770162
Test set: Average loss: 0.0682198, Accuracy: 0.9788
Train Epoch: 8  Loss: 0.168226
Test set: Average loss: 0.0625995, Accuracy: 0.98
Train Epoch: 9  Loss: 0.0598378
Test set: Average loss: 0.0611931, Accuracy: 0.9803
Train Epoch: 10 Loss: 0.106873
Test set: Average loss: 0.0558695, Accuracy: 0.9833

real    2m35.904s
user    6m44.229s
sys     0m5.343s
Train Epoch: 1  Loss: 0.079955
Test set: Average loss: 0.1384, Accuracy: 9585/10000 (96%)
Train Epoch: 2  Loss: 0.081791
Test set: Average loss: 0.0920, Accuracy: 9701/10000 (97%)
Train Epoch: 3  Loss: 0.390446
Test set: Average loss: 0.0689, Accuracy: 9789/10000 (98%)
Train Epoch: 4  Loss: 0.155608
Test set: Average loss: 0.0529, Accuracy: 9831/10000 (98%)
Train Epoch: 5  Loss: 0.009452
Test set: Average loss: 0.0469, Accuracy: 9846/10000 (98%)
Train Epoch: 6  Loss: 0.179386
Test set: Average loss: 0.0418, Accuracy: 9857/10000 (99%)
Train Epoch: 7  Loss: 0.013835
Test set: Average loss: 0.0414, Accuracy: 9861/10000 (99%)
Train Epoch: 8  Loss: 0.015339
Test set: Average loss: 0.0416, Accuracy: 9866/10000 (99%)
Train Epoch: 9  Loss: 0.014495
Test set: Average loss: 0.0432, Accuracy: 9867/10000 (99%)
Train Epoch: 10 Loss: 0.007735
Test set: Average loss: 0.0370, Accuracy: 9887/10000 (99%)


real    3m36.431s
user    9m46.867s
sys     0m6.731s
  • C++ Frontendもかなり書きやすいAPIになっているので使いこなしていきたい