
import chainer
import cupy
import matplotlib
chainer.print_runtime_info()
print('matplotlib:', matplotlib.__version__)Platform: Linux-4.14.137+-x86_64-with-Ubuntu-18.04-bionic
Chainer: 6.5.0
ChainerX: Not Available
NumPy: 1.17.3
CuPy:
  CuPy Version          : 6.5.0
  CUDA Root             : /usr/local/cuda
  CUDA Build Version    : 10000
  CUDA Driver Version   : 10010
  CUDA Runtime Version  : 10000
  cuDNN Build Version   : 7603
  cuDNN Version         : 7603
  NCCL Build Version    : 2402
  NCCL Runtime Version  : 2402
iDeep: 2.0.0.post3
matplotlib: 3.1.1


!wget https://github.com/japan-medical-ai/medical-ai-course-materials/releases/download/v0.1/seq.h5seq.h5              100%[===================>] 566.60M  71.1MB/s    in 8.5s    
2018-12-16 04:41:43 (66.7 MB/s) - ‘seq.h5’ saved [594118876/594118876]

!ls -lh
import h5py
import numpy as np
with h5py.File('seq.h5', 'r') as hf:
    for key in hf.keys():
        print(key, hf[key].shape, hf[key].dtype)(u'target_labels', (10,), dtype('S29'))
(u'test_in', (500, 131072, 4), dtype('bool'))
(u'test_out', (500, 1024, 10), dtype('<f2'))
(u'train_in', (5000, 131072, 4), dtype('bool'))
(u'train_out', (5000, 1024, 10), dtype('<f2'))
(u'valid_in', (500, 131072, 4), dtype('bool'))
(u'valid_out', (500, 1024, 10), dtype('<f2'))

%matplotlib inline
import matplotlib.pyplot as plt
with h5py.File('seq.h5') as hf:
    y = hf['train_out'][:100]
    fig_size = plt.rcParams["figure.figsize"]
    fig_size[0] = 20
    fig_size[1] = 5
    for i in range(3):
        plt.bar(range(y.shape[1]), y[0,:,i])



import chainer
import chainer.functions as F
import chainer.links as L
import cupy as cp
bc = 24 # base channel
default_squeeze_params = [
    # out_ch, kernel, stride, dropout
    [bc*2, 21, 2, 0], #1 128 -> 64
    [int(bc*2.5), 7, 4, 0.05], #2  64 -> 16
    [int(bc*3.2), 7, 4, 0.05], #3  16 -> 4
    [bc*4, 7, 4, 0.05]  #4  4 -> 1
]
default_dilated_params = [
# out_ch, kernel, dilated, dropout
  [bc, 3, 1, 0.1],
  [bc, 3, 2, 0.1], 
  [bc, 3, 4, 0.1], 
  [bc, 3, 8, 0.1], 
  [bc, 3, 16, 0.1], 
  [bc, 3, 32, 0.1],
  [bc, 3, 64, 0.1]
]
class Net(chainer.Chain):
    def __init__(self, squeeze_params=default_squeeze_params, dilated_params=default_dilated_params, n_targets=10):
        super(Net, self).__init__()
        self._n_squeeze = len(squeeze_params)
        self._n_dilated = len(dilated_params)
        with self.init_scope():
            in_ch = 4
            for i, param in enumerate(squeeze_params):
                out_ch, kernel, stride, do_rate = param
                setattr(self, "s_{}".format(i), SqueezeBlock(in_ch, out_ch, kernel, stride, do_rate))
                in_ch = out_ch
            for i, param in enumerate(dilated_params):
                out_ch, kernel, dilated, do_rate = param
                setattr(self, "d_{}".format(i), DilatedBlock(in_ch, out_ch, kernel, dilated, do_rate))
                in_ch += out_ch
            self.l = L.ConvolutionND(1, None, n_targets, 1)
    
    def forward(self, x):
        # x : (B, X, 4)
        xp = cp.get_array_module(x)
        h = xp.transpose(x, (0, 2, 1))
        h = h.astype(xp.float32)
                
        for i in range(self._n_squeeze):
            h = self["s_{}".format(i)](h)
    
        hs = [h]
        for i in range(self._n_dilated):
            h = self["d_{}".format(i)](hs)
            hs.append(h)
        h = self.l(F.concat(hs, axis=1))
        h = xp.transpose(h, (0, 2, 1))
        return h
import chainer
import chainer.functions as F
import chainer.links as L
import cupy as cp
class WNConvolutionND(L.ConvolutionND):
    def __init__(self, *args, **kwargs):
        super(WNConvolutionND, self).__init__(*args, **kwargs)
        self.add_param('g', self.W.data.shape[0])
        norm = np.linalg.norm(self.W.data.reshape(
            self.W.data.shape[0], -1), axis=1)
        self.g.data[...] = norm
    def __call__(self, x):
        norm = F.batch_l2_norm_squared(self.W) ** 0.5
        channel_size = self.W.data.shape[0]
        norm_broadcasted = F.broadcast_to(
            F.reshape(norm, (channel_size, 1, 1)), self.W.data.shape)
        g_broadcasted = F.broadcast_to(
            F.reshape(self.g, (channel_size, 1, 1)), self.W.data.shape)
        return F.convolution_nd(
            x, g_broadcasted * self.W / norm_broadcasted, self.b, self.stride,
            self.pad, self.cover_all, self.dilate)
class SqueezeBlock(chainer.Chain):  
    def __init__(self, in_ch, out_ch, kernel, stride, do_rate):
        super(SqueezeBlock, self).__init__()
        
        self.do_rate = do_rate
        with self.init_scope():
            pad = kernel // 2
            self.conv = WNConvolutionND(1, in_ch, out_ch*2, kernel, pad=pad, stride=stride)
      
    def forward(self, x):
        h = self.conv(x)
        h, g = F.split_axis(h, 2, 1)
        h = F.dropout(h * F.sigmoid(g), self.do_rate)
        return h
class DilatedBlock(chainer.Chain):
     def __init__(self, in_ch, out_ch, kernel, dilate, do_rate):
        super(DilatedBlock, self).__init__()
        self.do_rate = do_rate
        with self.init_scope():
            self.conv = WNConvolutionND(1, in_ch, out_ch*2, kernel, pad=dilate, dilate=dilate)
      
     def forward(self, xs):
        x = F.concat(xs, axis=1)
        h = self.conv(x)
        h, g = F.split_axis(h, 2, 1)
        h = F.dropout(h * F.sigmoid(g), self.do_rate)
        return h
WNConvolutionNDが定義されています.1を指定しています.
また,活性化関数ではと表されるGated Linear Unit[3]を利用しています.計算では効率化のため,WxとUxを別々に計算するのではなく2倍の出力チャンネル数を持つConvolutionを適用した後に出力結果をチャンネル方向に2つに分割し,片方にsigmoid関数を適用した後,それらを要素毎にかけ合わせます.concatがそれに対応).これはニューラルネットワークで多くのスキップ接続を作ることで,層が増えても勾配が減衰せず,学習がしやすくなることを利用したものです.
import numpy as np
n = Net()
size = 131072 # 128 * 1024
batchsize = 4
x = np.empty((batchsize, size, 4), dtype=np.bool)
y = n.forward(x)
print(y.shape)(4, 1024, 10)

import chainer.functions as F
import math
import sklearn
import numpy as np
def log_poisson_loss(log_x, t):
    loss =  F.mean(F.exp(log_x) - t * log_x) 
    t = chainer.cuda.to_cpu(t.astype(np.float32))  
    offset = F.mean(cp.array(t - t * np.ma.log(t)))
    return loss - offset
def log_r2_score(log_x, t):
    return F.r2_score(F.exp(log_x), t)
from chainer import training
import numpy as np
import math
class CosineScheduler(training.Extension):
    def __init__(self, attr='lr', init_val=0.0001, n_decays=200, n_warmups=3, target=None, optimizer=None):
        self._attr = attr
        self._target = target
        self._optimizer = optimizer
        self._min_loss = None
        self._last_value = None
        self._init_val = init_val
        self._n_decays = n_decays - n_warmups
        self._decay_count = 0
        self._n_warmups = n_warmups
    def __call__(self, trainer):
        updater = trainer.updater
        optimizer = self._get_optimizer(trainer)
        epoch = updater.epoch
        if epoch < self._n_warmups:
            value = self._init_val / (self._n_warmups + 1) * (epoch + 1)
        else:
            value = 0.5 * self._init_val * (1 + math.cos(math.pi * (epoch - self._n_warmups) / self._n_decays))
        self._update_value(optimizer, value)
    def _get_optimizer(self, trainer):
        return self._optimizer or trainer.updater.get_optimizer('main')
    def _update_value(self, optimizer, value):
        setattr(optimizer, self._attr, value)
        self._last_value = value
import chainer
import random
class PreprocessedDataset(chainer.dataset.DatasetMixin):
    def __init__(self, xs, ys, max_shift):
        self.xs = xs
        self.ys = ys
        self.max_shift = max_shift
    def __len__(self):
        return len(self.xs)
    def get_example(self, i):
        # It applies following preprocesses:
        #     - Cropping
        #     - Random flip
        x = self.xs[i]
        y = self.ys[i]
        s = random.randint(-self.max_shift, self.max_shift)
        x = np.roll(x, s, axis=0)
        return x, yratio分だけを学習,検証用データとして利用します.今回ratioは1に設定されています.この場合30分程度で学習が完了します.短い時間で試したい方はratio=1をratio=10やratio=20として実験してみてください.
import chainer
import chainer.functions as F
import chainer.links as L
import numpy as np
from chainer.training import extensions
from chainer import training
import h5py
ml_h5 = h5py.File('seq.h5')
train_x = ml_h5['train_in']
train_y = ml_h5['train_out']
valid_x = ml_h5['valid_in']
valid_y = ml_h5['valid_out']
test_x = ml_h5['test_in']
test_y = ml_h5['test_out']
ratio = 1
train_x = train_x[:len(train_x)//ratio]
train_y = train_y[:len(train_y)//ratio]
valid_x = valid_x[:len(valid_x)//ratio]
valid_y = valid_y[:len(valid_y)//ratio]
max_shift_for_data_augmentation = 5
train = PreprocessedDataset(train_x, train_y, max_shift_for_data_augmentation)
val = chainer.datasets.TupleDataset(valid_x, valid_y)
batchsize = 8
train_iter = chainer.iterators.SerialIterator(train, batchsize)
val_iter = chainer.iterators.SerialIterator(val, batchsize, repeat=False, shuffle=False)
model = L.Classifier(Net(), lossfun=log_poisson_loss, accfun=log_r2_score)
lr = 0.001
optimizer = chainer.optimizers.Adam(alpha=lr, beta1=0.97, beta2=0.98)
optimizer.setup(model)
optimizer.add_hook(chainer.optimizer_hooks.GradientClipping(threshold=0.01))
updater = training.updaters.StandardUpdater(
     train_iter, optimizer, device=0)
n_epochs = 10
n_warmups = 0
out = "out"
trainer = training.Trainer(updater, (n_epochs, 'epoch'), out=out)
trainer.extend(CosineScheduler(attr='alpha', init_val=lr, n_decays=n_epochs, n_warmups=n_warmups), trigger=(1, 'epoch'))
trainer.extend(extensions.Evaluator(val_iter, model, device = 0))
trainer.extend(extensions.LogReport(trigger=(0.2, 'epoch')))
trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=(1, 'epoch'))
trainer.extend(extensions.PrintReport(
          ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']), trigger = (0.1, 'epoch'))
# trainer.extend(extensions.ProgressBar())
           
trainer.run()
!ls -l out/
import chainer
import chainer.links as L
%matplotlib inline
import matplotlib.pyplot as plt
model_n_epoch = 10
out_dir = 'out'
model = L.Classifier(Net())
chainer.serializers.load_npz('{}/model_epoch_{}'.format(out_dir, model_n_epoch), model)
predictor = model.predictor
print(len(test_x))
with chainer.no_backprop_mode():
    test_y_estimated = F.exp(predictor(test_x[:1]))
test_y = test_y[:1]
print(test_y_estimated.shape)     
print(test_y_estimated[0,:,0])
y = test_y_estimated.data
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 20
fig_size[1] = 10
i = 0
b1 = plt.bar(range(y.shape[1]), y[0,:,i])
b2 = plt.bar(range(y.shape[1]), test_y[0,:,i])
plt.legend((b1, b2), ('estimated', 'observed'))