voice-interface-builder

Expert in building voice interfaces, speech recognition, and text-to-speech systems

7estrelas

5forks

Atualizado 1/21/2026

Obter Código Fonte

SKILL.md

readonlyread-only

name

voice-interface-builder

description

Expert in building voice interfaces, speech recognition, and text-to-speech systems

version

1.0.0

Voice Interface Builder Skill

I help you build voice-enabled interfaces using the Web Speech API and modern voice technologies.

What I Do

Speech Recognition:

Voice commands and controls
Voice-to-text input
Continuous dictation
Command detection

Text-to-Speech:

Reading content aloud
Voice feedback and notifications
Multilingual speech output
Voice selection and customization

Voice UI:

Voice-first interfaces
Accessibility features
Hands-free controls
Voice search

Web Speech API Basics

Speech Recognition

// hooks/useSpeechRecognition.ts
'use client'
import { useState, useEffect, useRef } from 'react'

interface SpeechRecognitionOptions {
  continuous?: boolean
  language?: string
  onResult?: (transcript: string) => void
  onError?: (error: string) => void
}

export function useSpeechRecognition({
  continuous = false,
  language = 'en-US',
  onResult,
  onError
}: SpeechRecognitionOptions = {}) {
  const [isListening, setIsListening] = useState(false)
  const [transcript, setTranscript] = useState('')
  const recognitionRef = useRef<SpeechRecognition | null>(null)

  useEffect(() => {
    if (typeof window === 'undefined') return

    const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition

    if (!SpeechRecognition) {
      console.warn('Speech recognition not supported')
      return
    }

    const recognition = new SpeechRecognition()
    recognition.continuous = continuous
    recognition.lang = language
    recognition.interimResults = true

    recognition.onresult = event => {
      const transcript = Array.from(event.results)
        .map(result => result[0].transcript)
        .join('')

      setTranscript(transcript)
      onResult?.(transcript)
    }

    recognition.onerror = event => {
      console.error('Speech recognition error:', event.error)
      onError?.(event.error)
      setIsListening(false)
    }

    recognition.onend = () => {
      setIsListening(false)
    }

    recognitionRef.current = recognition

    return () => {
      recognition.stop()
    }
  }, [continuous, language, onResult, onError])

  const start = () => {
    if (recognitionRef.current && !isListening) {
      recognitionRef.current.start()
      setIsListening(true)
    }
  }

  const stop = () => {
    if (recognitionRef.current && isListening) {
      recognitionRef.current.stop()
      setIsListening(false)
    }
  }

  return { isListening, transcript, start, stop }
}

Usage:

'use client'
import { useSpeechRecognition } from '@/hooks/useSpeechRecognition'

export function VoiceInput() {
  const { isListening, transcript, start, stop } = useSpeechRecognition({
    onResult: (text) => console.log('Recognized:', text)
  })

  return (
    <div>
      <button onClick={isListening ? stop : start}>
        {isListening ? '🔴 Stop' : '🎤 Start Listening'}
      </button>
      <p>{transcript}</p>
    </div>
  )
}

Text-to-Speech

// hooks/useSpeechSynthesis.ts
'use client'
import { useState, useEffect } from 'react'

export function useSpeechSynthesis() {
  const [voices, setVoices] = useState<SpeechSynthesisVoice[]>([])
  const [speaking, setSpeaking] = useState(false)

  useEffect(() => {
    if (typeof window === 'undefined') return

    const loadVoices = () => {
      const availableVoices = window.speechSynthesis.getVoices()
      setVoices(availableVoices)
    }

    loadVoices()
    window.speechSynthesis.onvoiceschanged = loadVoices
  }, [])

  const speak = (
    text: string,
    options?: {
      voice?: SpeechSynthesisVoice
      rate?: number
      pitch?: number
      volume?: number
    }
  ) => {
    if (typeof window === 'undefined') return

    const utterance = new SpeechSynthesisUtterance(text)

    if (options?.voice) utterance.voice = options.voice
    if (options?.rate) utterance.rate = options.rate // 0.1 - 10
    if (options?.pitch) utterance.pitch = options.pitch // 0 - 2
    if (options?.volume) utterance.volume = options.volume // 0 - 1

    utterance.onstart = () => setSpeaking(true)
    utterance.onend = () => setSpeaking(false)
    utterance.onerror = () => setSpeaking(false)

    window.speechSynthesis.speak(utterance)
  }

  const cancel = () => {
    if (typeof window !== 'undefined') {
      window.speechSynthesis.cancel()
      setSpeaking(false)
    }
  }

  return { speak, cancel, speaking, voices }
}

Usage:

'use client'
import { useSpeechSynthesis } from '@/hooks/useSpeechSynthesis'

export function TextToSpeech() {
  const { speak, cancel, speaking, voices } = useSpeechSynthesis()
  const [text, setText] = useState('Hello, how can I help you today?')

  return (
    <div>
      <textarea
        value={text}
        onChange={(e) => setText(e.target.value)}
        rows={4}
      />

      <button
        onClick={() => speak(text)}
        disabled={speaking}
      >
        {speaking ? 'Speaking...' : '🔊 Speak'}
      </button>

      <button onClick={cancel} disabled={!speaking}>
        ⏹️ Stop
      </button>

      <select onChange={(e) => {
        const voice = voices.find(v => v.name === e.target.value)
        if (voice) speak(text, { voice })
      }}>
        {voices.map((voice) => (
          <option key={voice.name} value={voice.name}>
            {voice.name} ({voice.lang})
          </option>
        ))}
      </select>
    </div>
  )
}

Voice Commands

Command Detection

'use client'
import { useSpeechRecognition } from '@/hooks/useSpeechRecognition'
import { useEffect } from 'react'

interface Command {
  phrase: string | RegExp
  action: () => void
}

export function VoiceCommands({ commands }: { commands: Command[] }) {
  const { isListening, transcript, start, stop } = useSpeechRecognition({
    continuous: true
  })

  useEffect(() => {
    if (!transcript) return

    const lowerTranscript = transcript.toLowerCase()

    for (const command of commands) {
      if (typeof command.phrase === 'string') {
        if (lowerTranscript.includes(command.phrase.toLowerCase())) {
          command.action()
        }
      } else {
        if (command.phrase.test(lowerTranscript)) {
          command.action()
        }
      }
    }
  }, [transcript, commands])

  return (
    <button onClick={isListening ? stop : start}>
      {isListening ? '🔴 Stop Voice Commands' : '🎤 Start Voice Commands'}
    </button>
  )
}

Usage:

export function App() {
  const commands = [
    {
      phrase: 'go home',
      action: () => router.push('/')
    },
    {
      phrase: 'open menu',
      action: () => setMenuOpen(true)
    },
    {
      phrase: 'search for',
      action: () => {
        // Extract search query after "search for"
      }
    }
  ]

  return <VoiceCommands commands={commands} />
}

Voice Search

'use client'
import { useSpeechRecognition } from '@/hooks/useSpeechRecognition'
import { useState } from 'react'

export function VoiceSearch() {
  const [query, setQuery] = useState('')

  const { isListening, start, stop } = useSpeechRecognition({
    onResult: (transcript) => {
      setQuery(transcript)
    }
  })

  const handleSearch = () => {
    if (query) {
      // Perform search with query
      console.log('Searching for:', query)
    }
  }

  return (
    <div className="flex gap-2">
      <input
        type="text"
        value={query}
        onChange={(e) => setQuery(e.target.value)}
        placeholder="Search or speak..."
        className="flex-1 px-4 py-2 border rounded"
      />

      <button
        onClick={isListening ? stop : start}
        className={`px-4 py-2 rounded ${
          isListening ? 'bg-red-600 text-white' : 'bg-gray-200'
        }`}
      >
        {isListening ? '🔴' : '🎤'}
      </button>

      <button
        onClick={handleSearch}
        className="px-4 py-2 bg-blue-600 text-white rounded"
      >
        Search
      </button>
    </div>
  )
}

Accessibility Features

Read Article Aloud

'use client'
import { useSpeechSynthesis } from '@/hooks/useSpeechSynthesis'

export function ReadAloudButton({ content }: { content: string }) {
  const { speak, cancel, speaking } = useSpeechSynthesis()

  return (
    <button
      onClick={() => speaking ? cancel() : speak(content)}
      className="flex items-center gap-2 px-4 py-2 bg-blue-600 text-white rounded"
    >
      {speaking ? '⏹️ Stop Reading' : '🔊 Read Aloud'}
    </button>
  )
}

Voice Navigation

'use client'
import { useRouter } from 'next/navigation'
import { useSpeechRecognition } from '@/hooks/useSpeechRecognition'
import { useEffect } from 'react'

export function VoiceNavigation() {
  const router = useRouter()
  const { isListening, transcript, start, stop } = useSpeechRecognition()

  useEffect(() => {
    const lower = transcript.toLowerCase()

    if (lower.includes('go to home')) router.push('/')
    else if (lower.includes('go to about')) router.push('/about')
    else if (lower.includes('go to contact')) router.push('/contact')
    else if (lower.includes('go back')) router.back()
  }, [transcript, router])

  return (
    <button onClick={isListening ? stop : start}>
      {isListening ? 'Disable Voice Nav' : 'Enable Voice Nav'}
    </button>
  )
}

Voice Dictation

'use client'
import { useSpeechRecognition } from '@/hooks/useSpeechRecognition'
import { useState } from 'react'

export function VoiceDictation() {
  const [text, setText] = useState('')

  const { isListening, start, stop } = useSpeechRecognition({
    continuous: true,
    onResult: (transcript) => {
      setText(transcript)
    }
  })

  return (
    <div>
      <textarea
        value={text}
        onChange={(e) => setText(e.target.value)}
        rows={10}
        className="w-full p-4 border rounded"
        placeholder="Start dictating..."
      />

      <div className="mt-4 flex gap-2">
        <button
          onClick={isListening ? stop : start}
          className={`px-4 py-2 rounded ${
            isListening ? 'bg-red-600 text-white' : 'bg-blue-600 text-white'
          }`}
        >
          {isListening ? '🔴 Stop Dictation' : '🎤 Start Dictation'}
        </button>

        <button
          onClick={() => setText('')}
          className="px-4 py-2 bg-gray-200 rounded"
        >
          Clear
        </button>
      </div>
    </div>
  )
}

Multilingual Support

'use client'
import { useSpeechRecognition } from '@/hooks/useSpeechRecognition'
import { useSpeechSynthesis } from '@/hooks/useSpeechSynthesis'
import { useState } from 'react'

const languages = [
  { code: 'en-US', name: 'English (US)' },
  { code: 'es-ES', name: 'Spanish (Spain)' },
  { code: 'fr-FR', name: 'French (France)' },
  { code: 'de-DE', name: 'German (Germany)' },
  { code: 'ja-JP', name: 'Japanese' },
  { code: 'zh-CN', name: 'Chinese (Simplified)' }
]

export function MultilingualVoice() {
  const [language, setLanguage] = useState('en-US')
  const [text, setText] = useState('')

  const { isListening, transcript, start, stop } = useSpeechRecognition({
    language,
    onResult: (t) => setText(t)
  })

  const { speak } = useSpeechSynthesis()

  return (
    <div>
      <select
        value={language}
        onChange={(e) => setLanguage(e.target.value)}
        className="px-4 py-2 border rounded"
      >
        {languages.map((lang) => (
          <option key={lang.code} value={lang.code}>
            {lang.name}
          </option>
        ))}
      </select>

      <div className="mt-4">
        <button onClick={isListening ? stop : start}>
          {isListening ? '🔴 Stop' : '🎤 Speak'}
        </button>

        <button onClick={() => speak(text)}>
          🔊 Read Aloud
        </button>
      </div>

      <textarea
        value={text}
        onChange={(e) => setText(e.target.value)}
        rows={4}
        className="w-full mt-4 p-4 border rounded"
      />
    </div>
  )
}

Voice Feedback

'use client'
import { useSpeechSynthesis } from '@/hooks/useSpeechSynthesis'

export function useVoiceFeedback() {
  const { speak } = useSpeechSynthesis()

  const announce = (message: string) => {
    speak(message, { rate: 1.1, volume: 0.8 })
  }

  return {
    success: (message: string) => announce(`Success: ${message}`),
    error: (message: string) => announce(`Error: ${message}`),
    info: (message: string) => announce(message),
    confirm: (action: string) => announce(`${action} confirmed`)
  }
}

// Usage
export function Form() {
  const voice = useVoiceFeedback()

  const handleSubmit = async () => {
    try {
      await submitForm()
      voice.success('Form submitted successfully')
    } catch (error) {
      voice.error('Failed to submit form')
    }
  }

  return <form onSubmit={handleSubmit}>...</form>
}

When to Use Me

Perfect for:

Building voice-controlled interfaces
Adding accessibility features
Creating hands-free experiences
Implementing voice search
Building voice assistants

I'll help you:

Implement speech recognition
Add text-to-speech
Build voice commands
Support multiple languages
Create accessible voice UI

What I'll Create

🎤 Speech Recognition
🔊 Text-to-Speech
🗣️ Voice Commands
🔍 Voice Search
🌍 Multilingual Support
♿ Accessibility Features

Let's make your app voice-enabled!

Related Skills

coding-agent

179Kdev-codegen

Run Codex CLI, Claude Code, OpenCode, or Pi Coding Agent via background process for programmatic control.

openclaw

Obter

Add unsigned integer (uint) type support to PyTorch operators by updating AT_DISPATCH macros. Use when adding support for uint16, uint32, uint64 types to operators, kernels, or when user mentions enabling unsigned types, barebones unsigned types, or uint support.

pytorch

Obter

at-dispatch-v2

97Kdev-codegen

Convert PyTorch AT_DISPATCH macros to AT_DISPATCH_V2 format in ATen C++ code. Use when porting AT_DISPATCH_ALL_TYPES_AND*, AT_DISPATCH_FLOATING_TYPES*, or other dispatch macros to the new v2 API. For ATen kernel files, CUDA kernels, and native operator implementations.

pytorch

Obter

skill-writer

97Kdev-codegen

Guide users through creating Agent Skills for Claude Code. Use when the user wants to create, write, author, or design a new Skill, or needs help with SKILL.md files, frontmatter, or skill structure.

pytorch

Obter

implementing-jsc-classes-cpp

87Kdev-codegen

Implements JavaScript classes in C++ using JavaScriptCore. Use when creating new JS classes with C++ bindings, prototypes, or constructors.

oven-sh

Obter

implementing-jsc-classes-zig

87Kdev-codegen

Creates JavaScript classes using Bun's Zig bindings generator (.classes.ts). Use when implementing new JS APIs in Zig with JSC integration.

oven-sh

Obter

voice-interface-builder

Voice Interface Builder Skill

What I Do

Web Speech API Basics

Speech Recognition

Text-to-Speech

Voice Commands

Command Detection

Voice Search

Accessibility Features

Read Article Aloud

Voice Navigation

Voice Dictation

Multilingual Support

Voice Feedback

When to Use Me

What I'll Create

You Might Also Like

Related Skills

coding-agent

add-uint-support

at-dispatch-v2

skill-writer

implementing-jsc-classes-cpp

implementing-jsc-classes-zig