SDK2.0βのColorBasics-WPFサンプルに音声認識部分を追加しています。
今回もKinectAudioStream.csをプロジェクトに追加しています。
変更はnamespaceとMainWindow.xaml.csだけです。
MainWindow.xaml.cs
//------------------------------------------------------------------------------
// <copyright file="MainWindow.xaml.cs" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//------------------------------------------------------------------------------
namespace Microsoft.Samples.Kinect.ColorBasics.speech
{
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Windows;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using Microsoft.Kinect;
using Microsoft.Speech.AudioFormat;
using Microsoft.Speech.Recognition;
/// <summary>
/// Interaction logic for MainWindow
/// </summary>
public partial class MainWindow : Window, INotifyPropertyChanged
{
/// <summary>
/// Size of the RGB pixel in the bitmap
/// </summary>
private readonly uint bytesPerPixel = 0;
/// <summary>
/// Active Kinect sensor
/// </summary>
private KinectSensor kinectSensor = null;
/// <summary>
/// Reader for color frames
/// </summary>
private ColorFrameReader colorFrameReader = null;
/// <summary>
/// Bitmap to display
/// </summary>
private WriteableBitmap colorBitmap = null;
/// <summary>
/// Intermediate storage for receiving frame data from the sensor
/// </summary>
private byte[] colorPixels = null;
/// <summary>
/// Current status text to display
/// </summary>
private string statusText = null;
/// <summary>
/// 音声ストリーム
/// </summary>
KinectAudioStream convertStream = null;
/// <summary>
/// 音声認識エンジン
/// </summary>
SpeechRecognitionEngine engine=null;
/// <summary>
/// 日本語音声認識エンジンのID
/// </summary>
const string engin_id = "SR_MS_ja-JP_Kinect_11.0";
/// <summary>
/// Initializes a new instance of the MainWindow class.
/// </summary>
public MainWindow()
{
// get the kinectSensor object
this.kinectSensor = KinectSensor.GetDefault();
// open the reader for the color frames
this.colorFrameReader = this.kinectSensor.ColorFrameSource.OpenReader();
// wire handler for frame arrival
this.colorFrameReader.FrameArrived += this.Reader_ColorFrameArrived;
// create the colorFrameDescription from the ColorFrameSource using Bgra format
FrameDescription colorFrameDescription = this.kinectSensor.ColorFrameSource.CreateFrameDescription(ColorImageFormat.Bgra);
// rgba is 4 bytes per pixel
this.bytesPerPixel = colorFrameDescription.BytesPerPixel;
// allocate space to put the pixels to be rendered
this.colorPixels = new byte[colorFrameDescription.Width * colorFrameDescription.Height * this.bytesPerPixel];
// create the bitmap to display
this.colorBitmap = new WriteableBitmap(colorFrameDescription.Width, colorFrameDescription.Height, 96.0, 96.0, PixelFormats.Bgr32, null);
// set IsAvailableChanged event notifier
this.kinectSensor.IsAvailableChanged += this.Sensor_IsAvailableChanged;
// open the sensor
this.kinectSensor.Open();
// set the status text
this.StatusText = this.kinectSensor.IsAvailable ? Properties.Resources.RunningStatusText
: Properties.Resources.NoSensorStatusText;
// use the window object as the view model in this simple example
this.DataContext = this;
// initialize the components (controls) of the window
this.InitializeComponent();
////////////////////////////////////////////////////////////
//音声認識関連
////////////////////////////////////////////////////////////
//音声入力設定
IReadOnlyList<AudioBeam> audioBeamList = kinectSensor.AudioSource.AudioBeams;
Stream audioStream = audioBeamList[0].OpenInputStream();
//audioStreamのビット変換
convertStream = new KinectAudioStream(audioStream);
//音声認識エンジン設定
engine = new SpeechRecognitionEngine(engin_id);
//認識するワード
var word = new Choices();
word.Add("チーズ");
word.Add("キャプチャ");
word.Add("シャッター");
//グラマービルダー
var gb = new GrammarBuilder();
//言語設定
gb.Culture = engine.RecognizerInfo.Culture;
//グラマービルダーに単語を登録
gb.Append(word);
//グラマー作成
var g = new Grammar(gb);
//認識エンジンにグラマーを登録
engine.LoadGrammar(g);
//イベントの登録
engine.SpeechRecognized += engine_SpeechRecognized;
engine.SpeechRecognitionRejected += engine_SpeechRecognitionRejected;
//オーディオストリームの変換をアクティブにする
convertStream.SpeechActive = true;
//認識エンジンに入力設定
engine.SetInputToAudioStream(convertStream, new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null));
//非同期で連続認識の開始
engine.RecognizeAsync(RecognizeMode.Multiple);
}
/// <summary>
///
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
void engine_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
{
throw new NotImplementedException();
}
/// <summary>
///
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
void engine_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
//認識信頼度の閾値
//(認識信頼度 -1<0<1 -1が低信頼度 0が標準 1が高信頼度 数字が大きいほど認識率が厳しくなる)
const double ConfidenceThreshold = 0.6;
if (e.Result.Confidence >= ConfidenceThreshold)
{
//認識した単語を表示
if (this.colorBitmap != null)
{
// create a png bitmap encoder which knows how to save a .png file
BitmapEncoder encoder = new PngBitmapEncoder();
// create frame from the writable bitmap and add to encoder
encoder.Frames.Add(BitmapFrame.Create(this.colorBitmap));
string time = System.DateTime.Now.ToString("hh'-'mm'-'ss", CultureInfo.CurrentUICulture.DateTimeFormat);
string myPhotos = Environment.GetFolderPath(Environment.SpecialFolder.MyPictures);
string path = Path.Combine(myPhotos, "KinectScreenshot-Color-" + time + ".png");
// write the new file to disk
try
{
// FileStream is IDisposable
using (FileStream fs = new FileStream(path, FileMode.Create))
{
encoder.Save(fs);
}
this.StatusText = string.Format(Properties.Resources.SavedScreenshotStatusTextFormat, path);
}
catch (IOException)
{
this.StatusText = string.Format(Properties.Resources.FailedScreenshotStatusTextFormat, path);
}
}
}
}
/// <summary>
/// INotifyPropertyChangedPropertyChanged event to allow window controls to bind to changeable data
/// </summary>
public event PropertyChangedEventHandler PropertyChanged;
/// <summary>
/// Gets the bitmap to display
/// </summary>
public ImageSource ImageSource
{
get
{
return this.colorBitmap;
}
}
/// <summary>
/// Gets or sets the current status text to display
/// </summary>
public string StatusText
{
get
{
return this.statusText;
}
set
{
if (this.statusText != value)
{
this.statusText = value;
// notify any bound elements that the text has changed
if (this.PropertyChanged != null)
{
this.PropertyChanged(this, new PropertyChangedEventArgs("StatusText"));
}
}
}
}
/// <summary>
/// Execute shutdown tasks
/// </summary>
/// <param name="sender">object sending the event</param>
/// <param name="e">event arguments</param>
private void MainWindow_Closing(object sender, CancelEventArgs e)
{
if (this.colorFrameReader != null)
{
// ColorFrameReder is IDisposable
this.colorFrameReader.Dispose();
this.colorFrameReader = null;
}
if (this.kinectSensor != null)
{
this.kinectSensor.Close();
this.kinectSensor = null;
}
}
/// <summary>
/// Handles the user clicking on the screenshot button
/// </summary>
/// <param name="sender">object sending the event</param>
/// <param name="e">event arguments</param>
private void ScreenshotButton_Click(object sender, RoutedEventArgs e)
{
if (this.colorBitmap != null)
{
// create a png bitmap encoder which knows how to save a .png file
BitmapEncoder encoder = new PngBitmapEncoder();
// create frame from the writable bitmap and add to encoder
encoder.Frames.Add(BitmapFrame.Create(this.colorBitmap));
string time = System.DateTime.Now.ToString("hh'-'mm'-'ss", CultureInfo.CurrentUICulture.DateTimeFormat);
string myPhotos = Environment.GetFolderPath(Environment.SpecialFolder.MyPictures);
string path = Path.Combine(myPhotos, "KinectScreenshot-Color-" + time + ".png");
// write the new file to disk
try
{
// FileStream is IDisposable
using (FileStream fs = new FileStream(path, FileMode.Create))
{
encoder.Save(fs);
}
this.StatusText = string.Format(Properties.Resources.SavedScreenshotStatusTextFormat, path);
}
catch (IOException)
{
this.StatusText = string.Format(Properties.Resources.FailedScreenshotStatusTextFormat, path);
}
}
}
/// <summary>
/// Handles the color frame data arriving from the sensor
/// </summary>
/// <param name="sender">object sending the event</param>
/// <param name="e">event arguments</param>
private void Reader_ColorFrameArrived(object sender, ColorFrameArrivedEventArgs e)
{
bool colorFrameProcessed = false;
// ColorFrame is IDisposable
using (ColorFrame colorFrame = e.FrameReference.AcquireFrame())
{
if (colorFrame != null)
{
FrameDescription colorFrameDescription = colorFrame.FrameDescription;
// verify data and write the new color frame data to the display bitmap
if ((colorFrameDescription.Width == this.colorBitmap.PixelWidth) && (colorFrameDescription.Height == this.colorBitmap.PixelHeight))
{
if (colorFrame.RawColorImageFormat == ColorImageFormat.Bgra)
{
colorFrame.CopyRawFrameDataToArray(this.colorPixels);
}
else
{
colorFrame.CopyConvertedFrameDataToArray(this.colorPixels, ColorImageFormat.Bgra);
}
colorFrameProcessed = true;
}
}
}
// we got a frame, render
if (colorFrameProcessed)
{
this.RenderColorPixels();
}
}
/// <summary>
/// Renders color pixels into the writeableBitmap.
/// </summary>
private void RenderColorPixels()
{
this.colorBitmap.WritePixels(
new Int32Rect(0, 0, this.colorBitmap.PixelWidth, this.colorBitmap.PixelHeight),
this.colorPixels,
this.colorBitmap.PixelWidth * (int)this.bytesPerPixel,
0);
}
/// <summary>
/// Handles the event which the sensor becomes unavailable (E.g. paused, closed, unplugged).
/// </summary>
/// <param name="sender">object sending the event</param>
/// <param name="e">event arguments</param>
private void Sensor_IsAvailableChanged(object sender, IsAvailableChangedEventArgs e)
{
// on failure, set the status text
this.StatusText = this.kinectSensor.IsAvailable ? Properties.Resources.RunningStatusText
: Properties.Resources.SensorNotAvailableStatusText;
}
}
}
説明:
前回の音声認識サンプルとほぼ同じです。スナップショットを撮るルーチンもClickイベント部分と同じです。
慣れてくれば簡単にイベント部分を音声認識イベントに変更できそうです。