Compare commits
3 Commits
c5956d3115
...
aafa36ad61
Author | SHA1 | Date | |
---|---|---|---|
aafa36ad61 | |||
dabe5b0fe2 | |||
ffec012ca0 |
@ -8,13 +8,15 @@ Personal voice assistant written in go using DeepSpeech. This was mainly created
|
||||
|
||||
The prerequisites for trident are:
|
||||
- libdeepspeech and DeepSpeech models (speech to text)
|
||||
- mimic (text to speech)
|
||||
- flite (text to speech)
|
||||
- go
|
||||
|
||||
`libdeepspeech` along with its models can be found in [its github releases](https://github.com/mozilla/DeepSpeech/releases/). Be sure to download the `native_client` tarball for your platform and the `.pbmm` and `.scorer` files.
|
||||
|
||||
`mimic` can be installed via an [install script](https://github.com/MycroftAI/mycroft-core/blob/dev/scripts/install-mimic.sh) or its AUR package if using Arch Linux or its derivatives.
|
||||
|
||||
`flite` can be installed via your distribution's repositories:
|
||||
- Debian/Ubuntu: `sudo apt install flite-dev`
|
||||
- Fedora: `sudo dnf install flite-devel`
|
||||
- Arch: `sudo pacman -S flite festival-us`
|
||||
### Installation
|
||||
|
||||
Move the previously downloaded models (`.pbmm` and `.scorer`) into this repo as `deepspeech.pbmm` and `deepspeech.scorer`. Then, follow the next steps.
|
||||
|
7
audio.go
7
audio.go
@ -84,11 +84,16 @@ func playActivationTone(ctx *malgo.AllocatedContext) error {
|
||||
|
||||
// Create new channel waiting for completion
|
||||
done := make(chan bool)
|
||||
doneVar := false
|
||||
onSamples := func(output, _ []byte, _ uint32) {
|
||||
// Read as much audio into output as will fit
|
||||
n, err := io.ReadFull(wavReader, output)
|
||||
// If error occurred or no bytes read
|
||||
if err != nil || n == 0 {
|
||||
if !doneVar && (err != nil || n == 0) {
|
||||
if *verbose {
|
||||
log.Debug().Msg("Sample output complete")
|
||||
}
|
||||
doneVar = true
|
||||
// Signal completion
|
||||
done <- true
|
||||
}
|
||||
|
@ -93,4 +93,4 @@ func configEnv() (gopath, configDir, execDir, confPath string) {
|
||||
}
|
||||
// Return all variables
|
||||
return
|
||||
}
|
||||
}
|
||||
|
1
go.mod
1
go.mod
@ -4,6 +4,7 @@ go 1.16
|
||||
|
||||
require (
|
||||
github.com/asticode/go-astideepspeech v0.10.0
|
||||
github.com/gen2brain/flite-go v0.0.0-20170519100317-f4df2119132c
|
||||
github.com/gen2brain/malgo v0.10.29
|
||||
github.com/pelletier/go-toml v1.9.0
|
||||
github.com/rs/zerolog v1.21.0
|
||||
|
2
go.sum
2
go.sum
@ -5,6 +5,8 @@ github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7
|
||||
github.com/cryptix/wav v0.0.0-20180415113528-8bdace674401/go.mod h1:knK8fd+KPlGGqSUWogv1DQzGTwnfUvAi0cIoWyOG7+U=
|
||||
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/gen2brain/flite-go v0.0.0-20170519100317-f4df2119132c h1:JBlwZJSYopoPXh0dLN9GGw750uhU08VjKKpl+uX5pE4=
|
||||
github.com/gen2brain/flite-go v0.0.0-20170519100317-f4df2119132c/go.mod h1:Wv0H30ZpZPf4CrBNqgiG2S4G0CDtZWS2i87JnPtv9LI=
|
||||
github.com/gen2brain/malgo v0.10.29 h1:bTYiUTUKJsEomNby+W0hgyLrOttUXIk4lTEnKA54iqM=
|
||||
github.com/gen2brain/malgo v0.10.29/go.mod h1:zHSUNZAXfCeNsZou0RtQ6Zk7gDYLIcKOrUWtAdksnEs=
|
||||
github.com/pelletier/go-toml v1.9.0 h1:NOd0BRdOKpPf0SxkL3HxSQOG7rNh+4kl6PHcBPFs7Q0=
|
||||
|
8
main.go
8
main.go
@ -45,6 +45,7 @@ func main() {
|
||||
// Define and parse command line flags
|
||||
tfLogLevel := flag.Int("tf-log-level", 2, "Log level for TensorFlow")
|
||||
verbose = flag.BoolP("verbose", "v", false, "Log more events")
|
||||
showDecode := flag.BoolP("show-decode", "d", false, "Show text to speech decodes")
|
||||
configPath := flag.StringP("config", "c", confPath, "Location of trident TOML config")
|
||||
modelPath := flag.StringP("model", "m", filepath.Join(execDir, "deepspeech.pbmm"), "Path to DeepSpeech model")
|
||||
scorerPath := flag.StringP("scorer", "s", filepath.Join(execDir, "deepspeech.scorer"), "Path to DeepSpeech scorer")
|
||||
@ -207,7 +208,7 @@ func main() {
|
||||
// Create goroutine to clean stream every minute
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(time.Minute)
|
||||
time.Sleep(20 * time.Second)
|
||||
// Lock mutex of stream
|
||||
safeStream.Lock()
|
||||
// Reset stream and buffer
|
||||
@ -223,7 +224,7 @@ func main() {
|
||||
var tts string
|
||||
listenForActivation := true
|
||||
for {
|
||||
time.Sleep(time.Second)
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
// Convert captured raw audio to slice of int16
|
||||
slice, err := convToInt16Slice(captured)
|
||||
if err != nil {
|
||||
@ -240,6 +241,9 @@ func main() {
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msg("Error intermediate decoding stream")
|
||||
}
|
||||
if *showDecode {
|
||||
log.Debug().Msg("TTS Decode: " + tts)
|
||||
}
|
||||
// If decoded string contains activation phrase and listenForActivation is true
|
||||
if strings.Contains(tts, config.ActivationPhrase) && listenForActivation {
|
||||
// Play activation tone
|
||||
|
@ -100,4 +100,3 @@ func initPlugins(gopath string) map[string]pluginFunc {
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
|
@ -21,18 +21,21 @@ package shell
|
||||
import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"trident"
|
||||
)
|
||||
|
||||
func RunPlugin(program string, data map[string]interface{}) {
|
||||
var shell string
|
||||
var ok bool
|
||||
// Attempt to get shell from config, asserting as string
|
||||
shell, ok = data["shell"].(string)
|
||||
shell, ok := data["shell"].(string)
|
||||
// If unsuccessful
|
||||
if !ok {
|
||||
// Set shell to default (/bin/sh)
|
||||
shell = "/bin/sh"
|
||||
}
|
||||
sayOutput, ok := data["sayOutput"].(bool)
|
||||
if !ok {
|
||||
sayOutput = false
|
||||
}
|
||||
// Create command using configured shell or default (/bin/sh)
|
||||
cmd := exec.Command(shell, "-c", program)
|
||||
// Set command environment to system environment
|
||||
@ -40,5 +43,8 @@ func RunPlugin(program string, data map[string]interface{}) {
|
||||
// Set command's standard error to system standard error
|
||||
cmd.Stderr = os.Stderr
|
||||
// Run command, ignoring error
|
||||
_ = cmd.Run()
|
||||
output, _ := cmd.Output()
|
||||
if sayOutput {
|
||||
trident.Say(string(output))
|
||||
}
|
||||
}
|
||||
|
30
symbols.go
30
symbols.go
@ -18,28 +18,28 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/gen2brain/flite-go"
|
||||
"github.com/traefik/yaegi/interp"
|
||||
"os/exec"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// Create custom package for trident
|
||||
var tridentSymbols = interp.Exports{"trident": {
|
||||
"Say": reflect.ValueOf(Say),
|
||||
"Say": reflect.ValueOf(Say),
|
||||
"SayWithVoice": reflect.ValueOf(SayWithVoice),
|
||||
}}
|
||||
|
||||
// Function to say text using mimic text-to-speech
|
||||
func Say(text string, args ...string) {
|
||||
// If mimic exists in PATH
|
||||
if _, err := exec.LookPath("mimic"); err == nil {
|
||||
// Set initial argument slice to contain text
|
||||
argSlice := []string{"-t", text}
|
||||
// Add any additional arguments to slice
|
||||
argSlice = append(argSlice, args...)
|
||||
// Create and run command
|
||||
exec.Command("mimic", argSlice...).Run()
|
||||
} else {
|
||||
// If mimic does not exist in PATH, warn user
|
||||
log.Warn().Err(err).Str("text", text).Msg("Cannot perform text to speech")
|
||||
func Say(text string) {
|
||||
fliteVoice, _ := flite.VoiceSelect("slt")
|
||||
flite.TextToSpeech(text, fliteVoice, "play")
|
||||
}
|
||||
|
||||
func SayWithVoice(text, voice string) error {
|
||||
fliteVoice, err := flite.VoiceSelect(voice)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
flite.TextToSpeech(text, fliteVoice, "play")
|
||||
return nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user