Skip to main content

Using Microphone Input

The example below will help you transcribe your voice by using the official Speechmatics Python library and CLI.

You can of course integrate with Speechmatics using our Official JavaScript SDK, or in the programming language of your choice by referring to the Real-Time API Reference.

Setup

The Speechmatics Python library and CLI can be installed using pip:

pip3 install speechmatics-python

Transcribe your voice in real-time using the Speechmatics Python library. Just copy in your API key to get started!

In order to use this script, you may also need to install PyAudio by running:

pip3 install pyaudio
info
For Mac M1/M2, use the following instructions to install PyAudio:
brew install portaudio
brew link portaudio
BREW_PREFIX=$(brew --prefix)
CFLAGS="-I$BREW_PREFIX/include -L$BREW_PREFIX/lib" python3 -m pip install pyaudio

Code example

1import speechmatics
2from httpx import HTTPStatusError
3import asyncio
4import pyaudio
5
6API_KEY = "YOUR_API_KEY"
7LANGUAGE = "en"
8CONNECTION_URL = f"wss://eu2.rt.speechmatics.com/v2/{LANGUAGE}"
9DEVICE_INDEX = -1
10CHUNK_SIZE = 1024
11
12
13class AudioProcessor:
14    def __init__(self):
15        self.wave_data = bytearray()
16        self.read_offset = 0
17
18    async def read(self, chunk_size):
19        while self.read_offset + chunk_size > len(self.wave_data):
20            await asyncio.sleep(0.001)
21        new_offset = self.read_offset + chunk_size
22        data = self.wave_data[self.read_offset:new_offset]
23        self.read_offset = new_offset
24        return data
25
26    def write_audio(self, data):
27        self.wave_data.extend(data)
28        return
29
30
31audio_processor = AudioProcessor()
32# PyAudio callback
33def stream_callback(in_data, frame_count, time_info, status):
34    audio_processor.write_audio(in_data)
35    return in_data, pyaudio.paContinue
36
37# Set up PyAudio
38p = pyaudio.PyAudio()
39if DEVICE_INDEX == -1:
40    DEVICE_INDEX = p.get_default_input_device_info()['index']
41    device_name = p.get_default_input_device_info()['name']
42    DEF_SAMPLE_RATE = int(p.get_device_info_by_index(DEVICE_INDEX)['defaultSampleRate'])
43    print(f"***\nIf you want to use a different microphone, update DEVICE_INDEX at the start of the code to one of the following:")
44    # Filter out duplicates that are reported on some systems
45    device_seen = set()
46    for i in range(p.get_device_count()):
47        if p.get_device_info_by_index(i)['name'] not in device_seen:
48            device_seen.add(p.get_device_info_by_index(i)['name'])
49            try:
50                supports_input = p.is_format_supported(DEF_SAMPLE_RATE, input_device=i, input_channels=1, input_format=pyaudio.paFloat32)
51            except Exception:
52                supports_input = False
53            if supports_input:
54                print(f"-- To use << {p.get_device_info_by_index(i)['name']} >>, set DEVICE_INDEX to {i}")
55    print("***\n")
56
57SAMPLE_RATE = int(p.get_device_info_by_index(DEVICE_INDEX)['defaultSampleRate'])
58device_name = p.get_device_info_by_index(DEVICE_INDEX)['name']
59
60print(f"\nUsing << {device_name} >> which is DEVICE_INDEX {DEVICE_INDEX}")
61print("Starting transcription (type Ctrl-C to stop):")
62
63stream = p.open(format=pyaudio.paFloat32,
64                channels=1,
65                rate=SAMPLE_RATE,
66                input=True,
67                frames_per_buffer=CHUNK_SIZE,
68                input_device_index=DEVICE_INDEX,
69                stream_callback=stream_callback
70)
71
72# Define connection parameters
73conn = speechmatics.models.ConnectionSettings(
74    url=CONNECTION_URL,
75    auth_token=API_KEY,
76)
77
78# Create a transcription client
79ws = speechmatics.client.WebsocketClient(conn)
80
81# Define transcription parameters
82# Full list of parameters described here: https://speechmatics.github.io/speechmatics-python/models
83conf = speechmatics.models.TranscriptionConfig(
84    language=LANGUAGE,
85    enable_partials=True,
86    max_delay=5,
87)
88
89# Define an event handler to print the partial transcript
90def print_partial_transcript(msg):
91    print(f"[partial] {msg['metadata']['transcript']}")
92
93# Define an event handler to print the full transcript
94def print_transcript(msg):
95    print(f"[  FINAL] {msg['metadata']['transcript']}")
96
97# Register the event handler for partial transcript
98ws.add_event_handler(
99    event_name=speechmatics.models.ServerMessageType.AddPartialTranscript,
100    event_handler=print_partial_transcript,
101)
102
103# Register the event handler for full transcript
104ws.add_event_handler(
105    event_name=speechmatics.models.ServerMessageType.AddTranscript,
106    event_handler=print_transcript,
107)
108
109settings = speechmatics.models.AudioSettings()
110settings.encoding = "pcm_f32le"
111settings.sample_rate = SAMPLE_RATE
112settings.chunk_size = CHUNK_SIZE
113
114print("Starting transcription (type Ctrl-C to stop):")
115try:
116    ws.run_synchronously(audio_processor, conf, settings)
117except KeyboardInterrupt:
118    print("\nTranscription stopped.")
119except HTTPStatusError as e:
120    if e.response.status_code == 401:
121        print('Invalid API key - Check your API_KEY at the top of the code!')
122    else:
123        raise e
124
125