chainerのimagenetサンプルをopencvを使って動画に適用するサンプルコード

Windows環境にchainerが入っている前提で説明します。
特に難しい処理をしている訳ではないので、コードをなるべく書かずに気軽に試したい方向けです。

まず、opencvを普段使っていない方はPCにダウンロードしてください。

上記リンクのOpenCV for Windowsをダウンロードし、exeファイルを実行して好きな場所に解凍してください。C直下などがおすすめです。
そして、解凍したフォルダを開き、「opencv」→「build」→「python」→「2.7」→「x64」→「cv2.pyd」というファイルをコピーします。
コピーしたcv2.pydを、「Python27」→「Lib」→「site-packages」の中に入れてください。Python27のフォルダはPythonをインストールした場所にあります。（基本はCドライブ）

これでpythonでopencvが使えるようになりました。
動画への拡張には　https://github.com/shi3z/chainer_imagenet_tools　こちらのinspection.pyのコードをベースに作成しました。

画像を引数で渡す形式から、動画のパスを指定して実行する形式に変更しています。
サンプルプログラムでは、動画の１フレーム中の一つの注目領域だけをCNNに入力しているので、用途によって注目領域をループでずらして使うなどしたら良いと思います。

以下サンプルプログラム

#!/usr/bin/env python
# coding: UTF-8
"""Example code of learning a large scale convnet from ILSVRC2012 dataset.

Prerequisite: To run this example, crop the center of training and
validation images and scale them to 256x256, and make two lists of space-
separated CSV whose first column is full path to image and second column is
zero-origin label (this format is same as that used by Caffe's ImageDataLayer).

"""
from __future__ import print_function
import argparse
import datetime
import json
import multiprocessing
import random
import sys
import threading
import time

import numpy as np
from PIL import Image


import six
import cPickle as pickle
from six.moves import queue

import chainer
import matplotlib.pyplot as plt
import numpy as np
import math
import chainer.functions as F
import chainer.links as L
from chainer.links import caffe
from matplotlib.ticker import * 
from chainer import serializers
from chainer import cuda

import cv2
import time


parser = argparse.ArgumentParser(
    description='Image inspection using chainer')
parser.add_argument('--model','-m',default='model', help='Path to model file')
parser.add_argument('--mean', default='mean.npy',
                    help='Path to the mean file (computed by compute_mean.py)')
parser.add_argument('--gpu', '-g', default=-1, type=int,
                    help='GPU ID (negative value indicates CPU)')
args = parser.parse_args()
if args.gpu >= 0:
    cuda.check_cuda_available()
xp = cuda.cupy if args.gpu >= 0 else np

#データの拡張を行う
def read_image(input_img, center=False, flip=False):
  image = np.asarray(input_img).transpose(2, 0, 1)
  if center:
    top = left = cropwidth / 2
  else:
    top = random.randint(0, cropwidth - 1)
    left = random.randint(0, cropwidth - 1)
  bottom = model.insize + top
  right = model.insize + left
  image = image[:, top:bottom, left:right].astype(np.float32)
  image -= mean_image[:, top:bottom, left:right]
  image /= 255
  if flip and random.randint(0, 1) == 0:
    return image[:, :, ::-1]
  else:
    return image

import nin

mean_image = pickle.load(open(args.mean, 'rb'))


model = nin.NIN()
serializers.load_npz(args.model, model)
cropwidth = 256 - model.insize
if args.gpu >= 0:
    cuda.get_device(args.gpu).use()
    model.to_gpu()
else:
    model.to_cpu()



def predict(net, x):
    h = F.max_pooling_2d(F.relu(net.mlpconv1(x)), 3, stride=2)
    h = F.max_pooling_2d(F.relu(net.mlpconv2(h)), 3, stride=2)
    h = F.max_pooling_2d(F.relu(net.mlpconv3(h)), 3, stride=2)
    h = net.mlpconv4(F.dropout(h, train=net.train))
    h = F.reshape(F.average_pooling_2d(h, 6), (x.data.shape[0], 1000))
    return F.softmax(h)

#動画読み込み
cap = cv2.VideoCapture('./movie.avi')
fps = cap.get(cv2.CAP_PROP_FPS)

#動画中のCNNに入力する注目領域(用途に合わせてループ内でラスタスキャン等を行う）
x = 0
y = 0
h = 256
w = 256

#動画を開いている間の処理
while(cap.isOpened()):
　
 #１フレーム取り出す
 ret, frame = cap.read()

 #注目領域を切り出す
 dst = frame[y:y+h, x:x+w]

 #opencvの画像形式からPILの形式へ変換
 cv_rgb = dst[::-1, :, ::-1].copy()
 CV2PIL_img = Image.fromarray(cv_rgb)

 #データ拡張の操作
 img = read_image(CV2PIL_img)

 #CNNへ渡す形式へ変換
 x = np.ndarray(
         (1, 3, model.insize, model.insize), dtype=np.float32)
 x[0]=img
 x = chainer.Variable(np.asarray(x), volatile='on')

 score = predict(model,x)
 categories = np.loadtxt("labels.txt", str, delimiter="\t")

 top_k = 20
 prediction = zip(score.data[0].tolist(), categories)
 prediction.sort(cmp=lambda x, y: cmp(x[0], y[0]), reverse=True)

 for rank, (score, name) in enumerate(prediction[:top_k], start=1):
     if rank == 1:
       #ランクが１の場合の処理を書く
       print('#%d | %s | %4.1f%%' % (rank, name, score * 100))
       if name == '設定したラベル':
         #ランクが１で指定のラベルの場合の処理を書く
           
     


cap.release()

画像処理とか機械学習とか

画像処理や機械学習関連の事について気まぐれで書いていきます。歩行者検出関係が多いと思います。ハリネズミもたまに出現します。

chainerのimagenetサンプルをopencvを使って動画に適用するサンプルコード