Dictates App shows how to create a simple application that can recognise Dictated Speech and input this into a TextBox.
Step 1
If not already, follow Setup and Start on how to Install and get Started with Visual Studio 2017 or in Windows 10 choose Start, and then from the Start Menu find and select Visual Studio 2017.
Step 2
Once Visual Studio Community 2017 has started, from the Menu choose File, then New then Project…
Step 3
From New Project choose Visual C# from Installed, Templates then choose Blank App (Universal Windows) and then type in a Name and select a Location and then select Ok to create the Project
Step 4
Then in New Universal Windows Project you need to select the Target Version this should be at least the Windows 10, version 1803 (10.0; Build 17134) which is the April 2018 Update and the Minimum Version to be the same.
The Target Version will control what features your application can use in Windows 10 so by picking the most recent version you’ll be able to take advantage of those features. To make sure you always have the most recent version, in Visual Studio 2017 select Tools Extensions and Updates… then and then see if there are any Updates
Step 5
Once done select from the Menu, Project, then Add New Item…
Step 6
From the Add New Item window select Visual C#, then Code from Installed then select Code File from the list, then type in the Name as Library.cs before selecting Add to add the file to the Project
Step 7
Once in the Code View for Library.cs the following should be entered:
using System.Threading.Tasks; using Windows.Storage; using Windows.Storage.Pickers; using System; using System.Collections.Generic; using Windows.UI.Xaml.Controls; using Windows.Foundation; using Windows.UI.Popups; using Windows.System; using Windows.Media.SpeechRecognition; using System.Text; using Windows.UI.Core; using Windows.Globalization; using Windows.UI.Xaml.Media; public class Library { private const string app_title = "Dictates App"; private const string extension_txt = ".txt"; private const string label_dictate = "Dictate"; private const string label_stop = "Stop"; private const uint privacy_statement_declined = 0x80045509; private IAsyncOperation<IUICommand> _dialogCommand; private SpeechRecognizer _recogniser = new SpeechRecognizer(); private StringBuilder _builder = new StringBuilder(); private CoreDispatcher _dispatcher; private bool _listening; public delegate void ResultHandler(string value); public event ResultHandler Result; public delegate void CompletedHandler(); public event CompletedHandler Completed; private async Task<bool> ShowDialogAsync(string content, string title = app_title) { try { if (_dialogCommand != null) { _dialogCommand.Cancel(); _dialogCommand = null; } _dialogCommand = new MessageDialog(content, title).ShowAsync(); await _dialogCommand; return true; } catch (TaskCanceledException) { return false; } } private async void ShowPrivacy() { await Launcher.LaunchUriAsync(new Uri("ms-settings:privacy-speechtyping")); } private async Task<string> OpenAsync() { try { FileOpenPicker picker = new FileOpenPicker() { SuggestedStartLocation = PickerLocationId.ComputerFolder }; picker.FileTypeFilter.Add(extension_txt); StorageFile open = await picker.PickSingleFileAsync(); if (open != null) return await FileIO.ReadTextAsync(open); } finally { } return null; } private async void SaveAsync(string contents) { try { FileSavePicker picker = new FileSavePicker() { SuggestedStartLocation = PickerLocationId.DocumentsLibrary, DefaultFileExtension = extension_txt, SuggestedFileName = "Document" }; picker.FileTypeChoices.Add("Text File", new List<string>() { extension_txt }); StorageFile save = await picker.PickSaveFileAsync(); if (save != null) await FileIO.WriteTextAsync(save, contents); } finally { } } private async void Recogniser_Completed( SpeechContinuousRecognitionSession sender, SpeechContinuousRecognitionCompletedEventArgs args) { if (args.Status != SpeechRecognitionResultStatus.Success) { if (args.Status == SpeechRecognitionResultStatus.TimeoutExceeded) { await _dispatcher.RunAsync(CoreDispatcherPriority.Normal, () => { Result?.Invoke(_builder.ToString()); Completed?.Invoke(); _listening = false; }); } else { await _dispatcher.RunAsync(CoreDispatcherPriority.Normal, () => { Completed?.Invoke(); _listening = false; }); } } } private async void Recogniser_ResultGenerated( SpeechContinuousRecognitionSession sender, SpeechContinuousRecognitionResultGeneratedEventArgs args) { if (args.Result.Confidence == SpeechRecognitionConfidence.Medium || args.Result.Confidence == SpeechRecognitionConfidence.High) { _builder.Append($"{args.Result.Text} "); await _dispatcher.RunAsync(CoreDispatcherPriority.Normal, () => { Result?.Invoke(_builder.ToString()); }); } } private async void SpeechRecognizer_HypothesisGenerated( SpeechRecognizer sender, SpeechRecognitionHypothesisGeneratedEventArgs args) { string hypothesis = args.Hypothesis.Text; string content = $"{_builder.ToString()} {hypothesis} ..."; await _dispatcher.RunAsync(CoreDispatcherPriority.Normal, () => { Result?.Invoke(content); }); } private async void Setup(Language language) { if (_recogniser != null) { _recogniser.ContinuousRecognitionSession.Completed -= Recogniser_Completed; _recogniser.ContinuousRecognitionSession.ResultGenerated -= Recogniser_ResultGenerated; _recogniser.HypothesisGenerated -= SpeechRecognizer_HypothesisGenerated; _recogniser.Dispose(); _recogniser = null; } _recogniser = new SpeechRecognizer(language); SpeechRecognitionTopicConstraint constraint = new SpeechRecognitionTopicConstraint( SpeechRecognitionScenario.Dictation, "dictation"); _recogniser.Constraints.Add(constraint); SpeechRecognitionCompilationResult result = await _recogniser.CompileConstraintsAsync(); if (result.Status != SpeechRecognitionResultStatus.Success) { await ShowDialogAsync($"Grammar Compilation Failed: {result.Status.ToString()}"); } _recogniser.ContinuousRecognitionSession.Completed += Recogniser_Completed; _recogniser.ContinuousRecognitionSession.ResultGenerated += Recogniser_ResultGenerated; _recogniser.HypothesisGenerated += SpeechRecognizer_HypothesisGenerated; } public Dictionary<Language, string> Languages() { Dictionary<Language, string> results = new Dictionary<Language, string>(); foreach (Language language in SpeechRecognizer.SupportedTopicLanguages) { results.Add(language, language.DisplayName); } return results; } public async void Language(object value) { if (_recogniser != null) { Language language = (Language)value; if (_recogniser.CurrentLanguage != language) { try { Setup(language); } catch (Exception exception) { await ShowDialogAsync(exception.Message); } } } } private void Content_TextChanged(object sender, TextChangedEventArgs e) { var grid = (Grid)VisualTreeHelper.GetChild((TextBox)sender, 0); for (var i = 0; i <= VisualTreeHelper.GetChildrenCount(grid) - 1; i++) { object obj = VisualTreeHelper.GetChild(grid, i); if (!(obj is ScrollViewer)) continue; ((ScrollViewer)obj).ChangeView(0.0f, ((ScrollViewer)obj).ExtentHeight, 1.0f); break; } } public void Init(AppBarButton microphone, ComboBox languages, TextBox content) { _dispatcher = CoreWindow.GetForCurrentThread().Dispatcher; Completed += () => { microphone.Label = label_dictate; languages.IsEnabled = true; }; Result += (string value) => { content.Text = value; }; content.TextChanged += Content_TextChanged; } public async void Dictate(AppBarButton dictate, ComboBox languages, TextBox content) { dictate.IsEnabled = false; if (_listening == false) { if (_recogniser.State == SpeechRecognizerState.Idle) { dictate.Label = label_stop; languages.IsEnabled = false; try { _listening = true; await _recogniser.ContinuousRecognitionSession.StartAsync(); } catch (Exception ex) { if ((uint)ex.HResult == privacy_statement_declined) { ShowPrivacy(); } else { await ShowDialogAsync(ex.Message); } _listening = false; dictate.Label = label_dictate; languages.IsEnabled = true; } } } else { _listening = false; dictate.Label = label_dictate; languages.IsEnabled = true; if (_recogniser.State != SpeechRecognizerState.Idle) { try { await _recogniser.ContinuousRecognitionSession.StopAsync(); content.Text = _builder.ToString(); } catch (Exception ex) { await ShowDialogAsync(ex.Message); } } } dictate.IsEnabled = true; } public async void New(TextBox text) { if (await ShowDialogAsync("Create New Document?")) { _builder.Clear(); text.Text = string.Empty; } } public async void Open(TextBox text) { string content = await OpenAsync(); if (content != null) { text.Text = content; } } public void Save(ref TextBox text) { SaveAsync(text.Text); } }
In the Code File for Library there are using statements to include the necessary functionality. The Library Class has const Values along with Members such as IAsyncOperation of IUICommand for use with the ShowDialogAsync Method which will display a MessageDialog, SpeechRecognizer and CoreDispatcher It also has event for ResultHandler and CompletedHandler.
Also within the Library Class there is a ShowPrivacy to display a Speech Typing Privacy Settings Page. OpenAsync is used with a FileOpenPicker and the ReadTextAsync of FileIO to read a Text File. SaveAsync is used with FileSavePicker and WriteTextAsync of FileIO.
While still in the Library Class there is a Recogniser_Completed Event Handler which will be triggered when the SpeechRecognizer has finished Recognising a Dictation and will trigger the relevent event based on the SpeechRecognitionResultStatus. The Recogniser_ResultGenerated will be triggered when there is a result from the SpeechRecognizer and this will based on a SpeechRecognitionConfidence and will Append to a StringBuilder and Raise the Result Event Accordingly with its contents. The SpeechRecognizer_HypothesisGenerated responds to when a Hypothesis from the SpeechRecognizer is dected and this Raise the Result Event with this.
Again in the Library Class there is a Setup Method which takes a Language Parameter and will configure the relevant Event Handlers and set up the SpeechRecognitionTopicConstraint and Language of the SpeechRecognizer. The Languages Method will return all the supported Languages of the SpeechRecognizer as a Dictionary of Language and string. The Language Method will set the CurrentLanguage of the SpeechRecognizer. Content_TextChanged is an Event Handler that will help Scroll content as it is Dictated within a TextBox, the Init Method is used to initialise the Event Handlers and CoreDispatcher. The Dictate Method is used to begin with StartAsync or end a Dictation with StopAsync of the ContinuousRecognitionSession of SpeechRecognizer and will also show any messages should there be any Errors. The New Method will be used to clear a TextBox, Open is used with OpenAsync to read a Text File and Save is used to write a Text File from the content Dictated.
Step 8
In the Solution Explorer select MainPage.xaml
Step 9
From the Menu choose View and then Designer
Step 10
The Design View will be displayed along with the XAML View and in in this between the Grid and /Grid elements, enter the following XAML:
<Grid Margin="50"> <Grid.RowDefinitions> <RowDefinition Height="Auto"/> <RowDefinition Height="*"/> </Grid.RowDefinitions> <ComboBox Grid.Row="0" Name="Language" HorizontalAlignment="Stretch" SelectedValuePath="Key" DisplayMemberPath="Value" SelectionChanged="Language_SelectionChanged"/> <TextBox Grid.Row="1" Name="Display" AcceptsReturn="True" TextWrapping="Wrap"/> </Grid> <CommandBar VerticalAlignment="Bottom"> <AppBarButton Name="Dictate" Icon="Microphone" Label="Dictate" Click="Dictate_Click"/> <AppBarButton Icon="Page2" Label="New" Click="New_Click"/> <AppBarButton Icon="OpenFile" Label="Open" Click="Open_Click"/> <AppBarButton Icon="Save" Label="Save" Click="Save_Click"/> </CommandBar>
Within the main Grid Element, the first block of XAML is a Grid Control which has two Rows, in the first Row is a ComboBox which will display the Language Options available and in the second Row is a TextBox Control. The second block of XAML is a CommandBar with AppBarButton for Dictate, New, Open and Save that calls Dictate_Click, New_Click, Open_Click and Save_Click.
Step 11
From the Menu choose View and then Code
Step 12
Once in the Code View, below the end of public MainPage() { … } the following Code should be entered:
Library library = new Library(); protected override void OnNavigatedTo(NavigationEventArgs e) { library.Init(Dictate, Language, Display); Language.ItemsSource = library.Languages(); Language.SelectedIndex = 0; } private void Language_SelectionChanged(object sender, SelectionChangedEventArgs e) { library.Language(Language.SelectedValue); } private void Dictate_Click(object sender, RoutedEventArgs e) { library.Dictate(Dictate, Language, Display); } private void New_Click(object sender, RoutedEventArgs e) { library.New(Display); } private void Open_Click(object sender, RoutedEventArgs e) { library.Open(Display); } private void Save_Click(object sender, RoutedEventArgs e) { library.Save(ref Display); }
There is an OnNavigatedTo Event Handler which calls the Init Method in the Library Class and sets the ItemsSource of the ComboBox. The Language_SelectionChanged Event Handler calls the Language Method in the Library Class, Dictate_Click calls Dictate, New_Click calls New, Open_Click calls Open and Save_Click Calls Save in the Library Class.
Step 13
In the Solution Explorer select Package.appxmanifest
Step 14
From the Menu choose View and then Designer
Step 15
Finally in the Package.appxmanifest select Capabilities and then make sure the Microphone option is checked
Step 16
That completes the Universal Windows Platform Application so Save the Project then in Visual Studio select the Local Machine to run the Application
Step 17
Once the Application has started running you can select the Dictate Button to Start or Stop Dictation, the first time you do this there will be a question displayed similar to Let DictatesApp access your Microphone and select Yes then you can then Speak and it will recognise and input into the TextBox what you are saying, you can then use Save to preserve what you said that you can Open later or start again with New, you may also need to allow the Application via the Privacy link to Dictate speech
Step 18
To Exit the Application select the Close button in the top right of the Application
This example shows how to use the SpeechRecognizer along with any supported Language on your Local Maching to Dictate anything spoken which will be Transcribed and Input into a TextBox, this is a simple example that shows how you can build in Speech recognition into an Application to implement continuous dictation.