# 1. 前言

最近在學習pytorch，先照着官方的“60分鍾教程”學習了一下，然后再github上找了兩個star比較多的項目，自己寫了一下，學習一下別人的寫法。

# # 2. Deep Learning with PyTorch: A 60 Minute Blitz

## 2.1 base operations

``````# base operations
import torch

x = torch.empty(5, 3)
print(x)

x = torch.rand(1, 2)
print(x)

x = torch.zeros(4,5, dtype = torch.long)
print(x)

x = torch.tensor([5.5, 3])

print(x)

x = x.new_ones(5, 3, dtype=torch.double)

print(x)

x = torch.randn_like(x, dtype=torch.float)

print(x)
print(x.size())

y = torch.rand(5,3)
print(x+y)

result = torch.empty(5,3)
print(result)

result = x + y
print('result = ', result)

print('result2 = ', result)

print(y)
# 轉置
y.t_()
print(y)
# resize
x = torch.randn(4,4)
y = x.view(16)
z = x.view(-1, 8)
print(x.size(), y.size(), z.size())
print(y)
# 只有一個元素的時候
# If you have a one element tensor, use .item() to get the value as a Python number
x = torch.randn(1)
print(x)
print(x.item())

#  numpy bridge

a = torch.ones(5)
b = a.numpy()
print(a)
print(b)

print(a)
print(b)

import numpy as np

a = np.ones(5)
b = torch.from_numpy(a)
print(a)
print(b)

# cuda

if torch.cuda.is_available():
device = torch.device("cuda")
y = torch.ones_like(x, device=device)
x = x.to(device)
z = x + y
print(z)
print(z.to("cpu", torch.double))

# Neural Networks
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):

def __init__(self):
# python中的super( test, self).__init__()
# 首先找到test的父類（比如是類A），然后把類test的對象self轉換為類A的對象，然后“被轉換”的類A對象調用自己的__init__函數
super(Net, self).__init__()
# 1 input image channel, 6 output channels, 5x5 square convolution
# kernel
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)

def forward(self, x):
# Max pooling over a (2, 2) window
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
# If the size is a square you can only specify a single number
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x

def num_flat_features(self, x):
size = x.size()[1:]  # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features

net = Net()
print(net)

'''
Net(
(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(fc1): Linear(in_features=400, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)
'''

'''
You just have to define the forward function, and the backward function (where gradients are computed) is
automatically defined for you using autograd. You can use any of the Tensor operations in the forward function. x
'''

# The learnable parameters of a model are returned by net.parameters()
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

# output
'''
10
torch.Size([6, 1, 5, 5])
'''
# Let try a random 32x32 input Note: Expected input size to this net(LeNet) is 32x32. To use this net on MNIST dataset, please resize the images from the dataset to 32x32.

input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)
# Zero the gradient buffers of all parameters and backprops with random gradients:(初始化)
out.backward(torch.randn(1, 10))

'''
torch.nn only supports mini-batches. The entire torch.nn package only supports inputs that are
a mini-batch of samples, and not a single sample.
For example, nn.Conv2d will take in a 4D Tensor of nSamples x nChannels x Height x Width.
If you have a single sample, just use input.unsqueeze(0) to add a fake batch dimension.
'''
# Recap
'''
Recap:
torch.Tensor - A multi-dimensional array with support for autograd operations like backward(). Also holds the gradient w.r.t. the tensor.
nn.Module - Neural network module. Convenient way of encapsulating parameters, with helpers for moving them to GPU, exporting, loading, etc.
nn.Parameter - A kind of Tensor, that is automatically registered as a parameter when assigned as an attribute to a Module.
autograd.Function - Implements forward and backward definitions of an autograd operation. Every Tensor operation, creates at least a single Function node,
that connects to functions that created a Tensor and encodes its history.
'''

output = net(input)
target = torch.randn(10) #a dummy target, for example
target = target.view(1, -1) #  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)
#So, when we call loss.backward(), the whole graph is differentiated w.r.t. the loss,
# and all Tensors in the graph that has requires_grad=True will have their .grad Tensor accumulated with the gradient.

#For illustration, let us follow a few steps backward:

# BACKPROP
# To backpropagate the error all we have to do is to loss.backward(). You need to clear the existing gradients though, else gradients will be accumulated to existing gradients.
# Now we shall call loss.backward(), and have a look at conv1’s bias gradients before and after the backward.

loss.backward()

# Observe how gradient buffers had to be manually set to zero using optimizer.zero_grad(). This is because gradients are accumulated as explained in Backprop section.
import torch.optim as optim
Optimizer = optim.SGD(net.parameters(), lr= 0.01)
output = net(input)
loss = criterion(output, target)
loss.backward()
Optimizer.step() ## Does the update``````

## 2.2 train a classifier

``````# 1. load and normalize
import torch
import torchvision
import torchvision.transforms as transforms

# The output of torchvision datasets are PILImage images of range [0, 1]. We transform them to Tensors of normalized range [-1, 1].
# PILImage: python imaging Library ，python平台上圖像處理標准庫
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # 進行Normalize之前必須totensor

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship','truck')

# show training images
'''
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image

def imshow(img):
img = img / 2 + 0.5     # unnormalize
npimg = img.numpy()
plt.show(np.transpose(npimg, (1, 2, 0)))

# get some random training images
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

'''
# 2.define a foreward network

import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# first, define convolutional filter and weights of fc
self.conv1 = nn.Conv2d(3, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.max_pool = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)

def forward(self, x):
x = F.relu(self.conv1(x))
x = self.max_pool(x)
x = F.relu(self.conv2(x))
x = self.max_pool(x)
x = x.view(-1, 16*5*5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x

net = Net()

# 3. choose a optimizer
import torch.optim as optim

criterion = nn.CrossEntropyLoss() # loss
optimizer = optim.SGD(net.parameters(),lr=0.001,momentum=0.9)

# 4. train the network

for epoch in range(2): # loop over the whole dataset multiple times
total_loss = 0
for i, data in enumerate(trainloader, 0): # start from 0
inputs, labels = data  # get a batch data
outputs = net(inputs) # get the outputs
loss = criterion(outputs, labels) # get the constant loss
loss.backward() # compute every parameter's gradient
optimizer.step() # update the parameter
total_loss += loss.item()

# print the information
if i % 2000 == 1999: # print the loss every 2000 mini-batches
print('[%d %5d] loss: %.3f' % (epoch+1, i+1, total_loss / 2000))
total_loss = 0

print('Finished Training')

# 5.test the network on the test data
'''
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

outputs = net(images)
'''
# get the accurate

total = 0
correct = 0
images, labels = data
outputs = net(images)
_, prediction = torch.max(outputs.data, 1) #torch.max(a,1/0) 返回每一行/列中最大值的那個元素，且返回其索引（返回最大元素在這一行的列索引）,_用來獲取值，但沒用
# outputs.data 應該是獲取 tensor
total += labels.size(0)
correct += (prediction == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

# get the class_accurate
class_correct = list(0. for i in range(10))  # create a list for 10 class
class_total = list(0. for i in range(10))

images, labels = data
outputs = net(images)
_, prediction = torch.max(outputs, 1)
c = (prediction == labels).squeeze() # get a list of series class
for i in range(4): # 4 is batch_size
label = labels[i]
class_correct[label] += c[i].item()
class_total[label] += 1

for i in range(10):
print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

# train on single GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assume that we are on a CUDA machine, then this should print a CUDA device:
print(device)
net.to(device)
inputs, labels = inputs.to(device), labels.to(device)

# train on multiple GPU
'''
DataParallel splits your data automatically and sends job orders to multiple models on several GPUs. After
each model finishes their job, DataParallel collects and merges the results before returning it to you.
'''

import torch
import torch.nn as nn

input_size = 5
output_size = 2

batch_size = 30
data_size = 100

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# dummy dataset
class RandomDataset(Dataset):

def __init__(self, size, length):
self.len = length
self.data = torch.randn(length, size)

def __getitem__(self, index):
return self.data[index]

def __len__(self):
return self.len

batch_size=batch_size, shuffle=True)

class Model(nn.Module):
# Our model

def __init__(self, input_size, output_size):
super(Model, self).__init__()
self.fc = nn.Linear(input_size, output_size)

def forward(self, input):
output = self.fc(input)
print("\tIn Model: input size", input.size(),
"output size", output.size())

return output

model = Model(input_size, output_size)
if torch.cuda.device_count() > 1:
print("Let's use", torch.cuda.device_count(), "GPUs!")
# dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
model = nn.DataParallel(model)

model.to(device)

input = data.to(device)
output = model(input)
print("Outside: input size", input.size(),
"output_size", output.size())
'''
If you have 2, you will see:
# on 2 GPUs
Let's use 2 GPUs!
In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
In Model: input size torch.Size([5, 5]) output size torch.Size([5, 2])
In Model: input size torch.Size([5, 5]) output size torch.Size([5, 2])
Outside: input size torch.Size([10, 5]) output_size torch.Size([10, 2])
'''

'''If you have 3 GPUs, you will see:

Let's use 3 GPUs!
In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
Outside: input size torch.Size([10, 5]) output_size torch.Size([10, 2])'''

'''If you have 8, you will see:

Let's use 8 GPUs!
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
Outside: input size torch.Size([10, 5]) output_size torch.Size([10, 2])'''``````

# 3 規范化pytorch訓練MNIST數據集

``````import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
import argparse

class Net(nn.Module):

def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, 5)
self.conv2 = nn.Conv2d(10, 20, 5)
self.conv2_dropout = nn.Dropout2d()
self.max_pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)

def forward(self, x):
x = F.relu(self.max_pool(self.conv1(x)))
x = F.relu(self.max_pool(self.conv2_dropout(self.conv2(x)))) # dropout need before relu operation
x = x.view(-1, 320)
# print(x.size())
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x, dim=1)  # dim 代表緯度， 加了softmax， 相當於返回就是一個概率

def train(args, model, device, train_loader, optimizer, epoch):
model.train() # 當有drop跟nolizition等操作的時候，需要加上這一句
images, labels = data
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
loss = F.nll_loss(outputs, labels)  # loss = F.nll_loss()
loss.backward()
optimizer.step()
# get the information
if i % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
100. * i / len(train_loader), loss.item())) # 這里沒有取log_interval個sample的loss的平均，即某一個log_interval的loss

model.eval()
correct_num = 0
test_loss = 0.
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, prediction = torch.max(outputs.data, 1) # prediction里面是一維索引
correct_num += (prediction == labels).sum().item() # 獲得一個batch正確的數量
test_loss += F.nll_loss(outputs, labels, reduction='sum').item()   # sum up batch loss

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss / total_num, correct_num, total_num,
100. * correct_num / total_num))

def main():
parser = argparse.ArgumentParser(description='PyTorch MNIST')
help='input batch size for training (default: 64)')
help='input batch size for testing (default: 1000)')
help='number of epochs to train (default: 10)')
help='learning rate (default: 0.01)')
help='SGD momentum (default: 0.5)')
help='disables CUDA training')
help='random seed (default: 1)')
help='how many batches to wait before logging training status')
args = parser.parse_args()

use_cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)

device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.batch_size, shuffle=True, **kwargs)
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

for epoch in range(1, args.epochs + 1):
train(args, model, device, train_loader, optimizer, epoch)

if __name__ == '__main__':
main()
``````