Browse Source

implemented text generation with markov chains

master
arnaucode 7 years ago
parent
commit
0068854d5f
4 changed files with 194 additions and 0 deletions
  1. +27
    -0
      .gitignore
  2. +8
    -0
      README.md
  3. +48
    -0
      main.go
  4. +111
    -0
      markov.go

+ 27
- 0
.gitignore

@ -0,0 +1,27 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
text.txt

+ 8
- 0
README.md

@ -1,2 +1,10 @@
# goMarkov
markov chains text generator written in Go from scratch
```go
states := markov.train(text)
generatedText := markov.generateText(states, firstWord, count)
fmt.Println(generatedText)
```
(in the text variable, goes the text content, can be loaded from a .txt file)

+ 48
- 0
main.go

@ -0,0 +1,48 @@
package main
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"strconv"
"strings"
)
func readText(path string) (string, error) {
data, err := ioutil.ReadFile(path)
if err != nil {
//Do something
}
dataClean := strings.Replace(string(data), "\n", " ", -1)
content := string(dataClean)
return content, err
}
func main() {
fmt.Print("entry the first word: ")
newcommand := bufio.NewReader(os.Stdin)
firstWord, _ := newcommand.ReadString('\n')
firstWord = strings.TrimSpace(firstWord)
fmt.Print("first word: ")
fmt.Println(firstWord)
fmt.Println("how many words you want on the text?")
newcommand = bufio.NewReader(os.Stdin)
answer, _ := newcommand.ReadString('\n')
answer = strings.TrimSpace(answer)
fmt.Print("Number of words on text to generate: ")
fmt.Println(answer)
count, err := strconv.Atoi(answer)
if err != nil {
fmt.Println("incorrect entry, need a positive number")
}
text, _ := readText("text.txt")
fmt.Println("generating text")
states := markov.train(text)
generatedText := markov.generateText(states, firstWord, count)
fmt.Println("")
fmt.Println(generatedText)
}

+ 111
- 0
markov.go

@ -0,0 +1,111 @@
package main
import (
"fmt"
"math/rand"
"strconv"
"strings"
)
type Markov struct{}
type State struct {
Word string
Count int
Prob float64
NextStates []State
}
var markov Markov
func addWordToStates(states []State, word string) ([]State, int) {
iState := -1
for i := 0; i < len(states); i++ {
if states[i].Word == word {
iState = i
}
}
if iState >= 0 {
states[iState].Count++
} else {
var tempState State
tempState.Word = word
tempState.Count = 1
states = append(states, tempState)
iState = len(states) - 1
}
return states, iState
}
func calcMarkovStates(words []string) []State {
var states []State
//count words
for i := 0; i < len(words)-1; i++ {
var iState int
states, iState = addWordToStates(states, words[i])
if iState < len(words) {
states[iState].NextStates, _ = addWordToStates(states[iState].NextStates, words[i+1])
}
}
//count prob
for i := 0; i < len(states); i++ {
states[i].Prob = (float64(states[i].Count) / float64(len(words)) * 100)
for j := 0; j < len(states[i].NextStates); j++ {
states[i].NextStates[j].Prob = (float64(states[i].NextStates[j].Count) / float64(len(words)) * 100)
}
}
fmt.Println("total words computed: " + strconv.Itoa(len(words)))
//fmt.Println(states)
return states
}
func textToWords(text string) []string {
s := strings.Split(text, " ")
words := s
return words
}
func (markov Markov) train(text string) []State {
words := textToWords(text)
states := calcMarkovStates(words)
//fmt.Println(states)
return states
}
func getNextMarkovState(states []State, word string) string {
iState := -1
for i := 0; i < len(states); i++ {
if states[i].Word == word {
iState = i
}
}
if iState < 0 {
return "word no exist on the memory"
}
var next State
next = states[iState].NextStates[0]
next.Prob = rand.Float64() * states[iState].Prob
for i := 0; i < len(states[iState].NextStates); i++ {
if (rand.Float64()*states[iState].NextStates[i].Prob) > next.Prob && states[iState-1].Word != states[iState].NextStates[i].Word {
next = states[iState].NextStates[i]
}
}
return next.Word
}
func (markov Markov) generateText(states []State, initWord string, count int) string {
var generatedText []string
word := initWord
generatedText = append(generatedText, word)
for i := 0; i < count; i++ {
word = getNextMarkovState(states, word)
generatedText = append(generatedText, word)
}
generatedText = append(generatedText, ".")
text := strings.Join(generatedText, " ")
return text
}

Loading…
Cancel
Save