import chainer
import cupy
import matplotlib
chainer.print_runtime_info()
print('matplotlib:', matplotlib.__version__)
Platform: Linux-4.14.137+-x86_64-with-Ubuntu-18.04-bionic
Chainer: 6.5.0
ChainerX: Not Available
NumPy: 1.17.3
CuPy:
CuPy Version : 6.5.0
CUDA Root : /usr/local/cuda
CUDA Build Version : 10000
CUDA Driver Version : 10010
CUDA Runtime Version : 10000
cuDNN Build Version : 7603
cuDNN Version : 7603
NCCL Build Version : 2402
NCCL Runtime Version : 2402
iDeep: 2.0.0.post3
matplotlib: 3.1.1
!wget https://github.com/japan-medical-ai/medical-ai-course-materials/releases/download/v0.1/seq.h5
seq.h5 100%[===================>] 566.60M 71.1MB/s in 8.5s
2018-12-16 04:41:43 (66.7 MB/s) - ‘seq.h5’ saved [594118876/594118876]
!ls -lh
import h5py
import numpy as np
with h5py.File('seq.h5', 'r') as hf:
for key in hf.keys():
print(key, hf[key].shape, hf[key].dtype)
(u'target_labels', (10,), dtype('S29'))
(u'test_in', (500, 131072, 4), dtype('bool'))
(u'test_out', (500, 1024, 10), dtype('<f2'))
(u'train_in', (5000, 131072, 4), dtype('bool'))
(u'train_out', (5000, 1024, 10), dtype('<f2'))
(u'valid_in', (500, 131072, 4), dtype('bool'))
(u'valid_out', (500, 1024, 10), dtype('<f2'))
%matplotlib inline
import matplotlib.pyplot as plt
with h5py.File('seq.h5') as hf:
y = hf['train_out'][:100]
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 20
fig_size[1] = 5
for i in range(3):
plt.bar(range(y.shape[1]), y[0,:,i])
import chainer
import chainer.functions as F
import chainer.links as L
import cupy as cp
bc = 24 # base channel
default_squeeze_params = [
# out_ch, kernel, stride, dropout
[bc*2, 21, 2, 0], #1 128 -> 64
[int(bc*2.5), 7, 4, 0.05], #2 64 -> 16
[int(bc*3.2), 7, 4, 0.05], #3 16 -> 4
[bc*4, 7, 4, 0.05] #4 4 -> 1
]
default_dilated_params = [
# out_ch, kernel, dilated, dropout
[bc, 3, 1, 0.1],
[bc, 3, 2, 0.1],
[bc, 3, 4, 0.1],
[bc, 3, 8, 0.1],
[bc, 3, 16, 0.1],
[bc, 3, 32, 0.1],
[bc, 3, 64, 0.1]
]
class Net(chainer.Chain):
def __init__(self, squeeze_params=default_squeeze_params, dilated_params=default_dilated_params, n_targets=10):
super(Net, self).__init__()
self._n_squeeze = len(squeeze_params)
self._n_dilated = len(dilated_params)
with self.init_scope():
in_ch = 4
for i, param in enumerate(squeeze_params):
out_ch, kernel, stride, do_rate = param
setattr(self, "s_{}".format(i), SqueezeBlock(in_ch, out_ch, kernel, stride, do_rate))
in_ch = out_ch
for i, param in enumerate(dilated_params):
out_ch, kernel, dilated, do_rate = param
setattr(self, "d_{}".format(i), DilatedBlock(in_ch, out_ch, kernel, dilated, do_rate))
in_ch += out_ch
self.l = L.ConvolutionND(1, None, n_targets, 1)
def forward(self, x):
# x : (B, X, 4)
xp = cp.get_array_module(x)
h = xp.transpose(x, (0, 2, 1))
h = h.astype(xp.float32)
for i in range(self._n_squeeze):
h = self["s_{}".format(i)](h)
hs = [h]
for i in range(self._n_dilated):
h = self["d_{}".format(i)](hs)
hs.append(h)
h = self.l(F.concat(hs, axis=1))
h = xp.transpose(h, (0, 2, 1))
return h
import chainer
import chainer.functions as F
import chainer.links as L
import cupy as cp
class WNConvolutionND(L.ConvolutionND):
def __init__(self, *args, **kwargs):
super(WNConvolutionND, self).__init__(*args, **kwargs)
self.add_param('g', self.W.data.shape[0])
norm = np.linalg.norm(self.W.data.reshape(
self.W.data.shape[0], -1), axis=1)
self.g.data[...] = norm
def __call__(self, x):
norm = F.batch_l2_norm_squared(self.W) ** 0.5
channel_size = self.W.data.shape[0]
norm_broadcasted = F.broadcast_to(
F.reshape(norm, (channel_size, 1, 1)), self.W.data.shape)
g_broadcasted = F.broadcast_to(
F.reshape(self.g, (channel_size, 1, 1)), self.W.data.shape)
return F.convolution_nd(
x, g_broadcasted * self.W / norm_broadcasted, self.b, self.stride,
self.pad, self.cover_all, self.dilate)
class SqueezeBlock(chainer.Chain):
def __init__(self, in_ch, out_ch, kernel, stride, do_rate):
super(SqueezeBlock, self).__init__()
self.do_rate = do_rate
with self.init_scope():
pad = kernel // 2
self.conv = WNConvolutionND(1, in_ch, out_ch*2, kernel, pad=pad, stride=stride)
def forward(self, x):
h = self.conv(x)
h, g = F.split_axis(h, 2, 1)
h = F.dropout(h * F.sigmoid(g), self.do_rate)
return h
class DilatedBlock(chainer.Chain):
def __init__(self, in_ch, out_ch, kernel, dilate, do_rate):
super(DilatedBlock, self).__init__()
self.do_rate = do_rate
with self.init_scope():
self.conv = WNConvolutionND(1, in_ch, out_ch*2, kernel, pad=dilate, dilate=dilate)
def forward(self, xs):
x = F.concat(xs, axis=1)
h = self.conv(x)
h, g = F.split_axis(h, 2, 1)
h = F.dropout(h * F.sigmoid(g), self.do_rate)
return h
WNConvolutionND
が定義されています.1
を指定しています.
また,活性化関数ではと表されるGated Linear Unit[3]を利用しています.計算では効率化のため,WxとUxを別々に計算するのではなく2倍の出力チャンネル数を持つConvolutionを適用した後に出力結果をチャンネル方向に2つに分割し,片方にsigmoid関数を適用した後,それらを要素毎にかけ合わせます.concat
がそれに対応).これはニューラルネットワークで多くのスキップ接続を作ることで,層が増えても勾配が減衰せず,学習がしやすくなることを利用したものです.import numpy as np
n = Net()
size = 131072 # 128 * 1024
batchsize = 4
x = np.empty((batchsize, size, 4), dtype=np.bool)
y = n.forward(x)
print(y.shape)
(4, 1024, 10)
import chainer.functions as F
import math
import sklearn
import numpy as np
def log_poisson_loss(log_x, t):
loss = F.mean(F.exp(log_x) - t * log_x)
t = chainer.cuda.to_cpu(t.astype(np.float32))
offset = F.mean(cp.array(t - t * np.ma.log(t)))
return loss - offset
def log_r2_score(log_x, t):
return F.r2_score(F.exp(log_x), t)
from chainer import training
import numpy as np
import math
class CosineScheduler(training.Extension):
def __init__(self, attr='lr', init_val=0.0001, n_decays=200, n_warmups=3, target=None, optimizer=None):
self._attr = attr
self._target = target
self._optimizer = optimizer
self._min_loss = None
self._last_value = None
self._init_val = init_val
self._n_decays = n_decays - n_warmups
self._decay_count = 0
self._n_warmups = n_warmups
def __call__(self, trainer):
updater = trainer.updater
optimizer = self._get_optimizer(trainer)
epoch = updater.epoch
if epoch < self._n_warmups:
value = self._init_val / (self._n_warmups + 1) * (epoch + 1)
else:
value = 0.5 * self._init_val * (1 + math.cos(math.pi * (epoch - self._n_warmups) / self._n_decays))
self._update_value(optimizer, value)
def _get_optimizer(self, trainer):
return self._optimizer or trainer.updater.get_optimizer('main')
def _update_value(self, optimizer, value):
setattr(optimizer, self._attr, value)
self._last_value = value
import chainer
import random
class PreprocessedDataset(chainer.dataset.DatasetMixin):
def __init__(self, xs, ys, max_shift):
self.xs = xs
self.ys = ys
self.max_shift = max_shift
def __len__(self):
return len(self.xs)
def get_example(self, i):
# It applies following preprocesses:
# - Cropping
# - Random flip
x = self.xs[i]
y = self.ys[i]
s = random.randint(-self.max_shift, self.max_shift)
x = np.roll(x, s, axis=0)
return x, y
ratio
分だけを学習,検証用データとして利用します.今回ratio
は1に設定されています.この場合30分程度で学習が完了します.短い時間で試したい方はratio=1をratio=10やratio=20として実験してみてください.import chainer
import chainer.functions as F
import chainer.links as L
import numpy as np
from chainer.training import extensions
from chainer import training
import h5py
ml_h5 = h5py.File('seq.h5')
train_x = ml_h5['train_in']
train_y = ml_h5['train_out']
valid_x = ml_h5['valid_in']
valid_y = ml_h5['valid_out']
test_x = ml_h5['test_in']
test_y = ml_h5['test_out']
ratio = 1
train_x = train_x[:len(train_x)//ratio]
train_y = train_y[:len(train_y)//ratio]
valid_x = valid_x[:len(valid_x)//ratio]
valid_y = valid_y[:len(valid_y)//ratio]
max_shift_for_data_augmentation = 5
train = PreprocessedDataset(train_x, train_y, max_shift_for_data_augmentation)
val = chainer.datasets.TupleDataset(valid_x, valid_y)
batchsize = 8
train_iter = chainer.iterators.SerialIterator(train, batchsize)
val_iter = chainer.iterators.SerialIterator(val, batchsize, repeat=False, shuffle=False)
model = L.Classifier(Net(), lossfun=log_poisson_loss, accfun=log_r2_score)
lr = 0.001
optimizer = chainer.optimizers.Adam(alpha=lr, beta1=0.97, beta2=0.98)
optimizer.setup(model)
optimizer.add_hook(chainer.optimizer_hooks.GradientClipping(threshold=0.01))
updater = training.updaters.StandardUpdater(
train_iter, optimizer, device=0)
n_epochs = 10
n_warmups = 0
out = "out"
trainer = training.Trainer(updater, (n_epochs, 'epoch'), out=out)
trainer.extend(CosineScheduler(attr='alpha', init_val=lr, n_decays=n_epochs, n_warmups=n_warmups), trigger=(1, 'epoch'))
trainer.extend(extensions.Evaluator(val_iter, model, device = 0))
trainer.extend(extensions.LogReport(trigger=(0.2, 'epoch')))
trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=(1, 'epoch'))
trainer.extend(extensions.PrintReport(
['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']), trigger = (0.1, 'epoch'))
# trainer.extend(extensions.ProgressBar())
trainer.run()
!ls -l out/
import chainer
import chainer.links as L
%matplotlib inline
import matplotlib.pyplot as plt
model_n_epoch = 10
out_dir = 'out'
model = L.Classifier(Net())
chainer.serializers.load_npz('{}/model_epoch_{}'.format(out_dir, model_n_epoch), model)
predictor = model.predictor
print(len(test_x))
with chainer.no_backprop_mode():
test_y_estimated = F.exp(predictor(test_x[:1]))
test_y = test_y[:1]
print(test_y_estimated.shape)
print(test_y_estimated[0,:,0])
y = test_y_estimated.data
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 20
fig_size[1] = 10
i = 0
b1 = plt.bar(range(y.shape[1]), y[0,:,i])
b2 = plt.bar(range(y.shape[1]), test_y[0,:,i])
plt.legend((b1, b2), ('estimated', 'observed'))