mirror of
https://github.com/arnaucube/goMarkov.git
synced 2026-02-06 19:06:45 +01:00
implemented text generation with markov chains
This commit is contained in:
27
.gitignore
vendored
Normal file
27
.gitignore
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
||||
|
||||
|
||||
text.txt
|
||||
@@ -1,2 +1,10 @@
|
||||
# goMarkov
|
||||
markov chains text generator written in Go from scratch
|
||||
|
||||
|
||||
```go
|
||||
states := markov.train(text)
|
||||
generatedText := markov.generateText(states, firstWord, count)
|
||||
fmt.Println(generatedText)
|
||||
```
|
||||
(in the text variable, goes the text content, can be loaded from a .txt file)
|
||||
|
||||
48
main.go
Normal file
48
main.go
Normal file
@@ -0,0 +1,48 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func readText(path string) (string, error) {
|
||||
data, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
//Do something
|
||||
}
|
||||
dataClean := strings.Replace(string(data), "\n", " ", -1)
|
||||
content := string(dataClean)
|
||||
return content, err
|
||||
}
|
||||
|
||||
func main() {
|
||||
fmt.Print("entry the first word: ")
|
||||
newcommand := bufio.NewReader(os.Stdin)
|
||||
firstWord, _ := newcommand.ReadString('\n')
|
||||
firstWord = strings.TrimSpace(firstWord)
|
||||
fmt.Print("first word: ")
|
||||
fmt.Println(firstWord)
|
||||
|
||||
fmt.Println("how many words you want on the text?")
|
||||
newcommand = bufio.NewReader(os.Stdin)
|
||||
answer, _ := newcommand.ReadString('\n')
|
||||
answer = strings.TrimSpace(answer)
|
||||
fmt.Print("Number of words on text to generate: ")
|
||||
fmt.Println(answer)
|
||||
count, err := strconv.Atoi(answer)
|
||||
if err != nil {
|
||||
fmt.Println("incorrect entry, need a positive number")
|
||||
}
|
||||
|
||||
text, _ := readText("text.txt")
|
||||
|
||||
fmt.Println("generating text")
|
||||
states := markov.train(text)
|
||||
generatedText := markov.generateText(states, firstWord, count)
|
||||
fmt.Println("")
|
||||
fmt.Println(generatedText)
|
||||
}
|
||||
111
markov.go
Normal file
111
markov.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Markov struct{}
|
||||
|
||||
type State struct {
|
||||
Word string
|
||||
Count int
|
||||
Prob float64
|
||||
NextStates []State
|
||||
}
|
||||
|
||||
var markov Markov
|
||||
|
||||
func addWordToStates(states []State, word string) ([]State, int) {
|
||||
iState := -1
|
||||
for i := 0; i < len(states); i++ {
|
||||
if states[i].Word == word {
|
||||
iState = i
|
||||
}
|
||||
}
|
||||
if iState >= 0 {
|
||||
states[iState].Count++
|
||||
} else {
|
||||
var tempState State
|
||||
tempState.Word = word
|
||||
tempState.Count = 1
|
||||
|
||||
states = append(states, tempState)
|
||||
iState = len(states) - 1
|
||||
|
||||
}
|
||||
return states, iState
|
||||
}
|
||||
|
||||
func calcMarkovStates(words []string) []State {
|
||||
var states []State
|
||||
//count words
|
||||
for i := 0; i < len(words)-1; i++ {
|
||||
var iState int
|
||||
states, iState = addWordToStates(states, words[i])
|
||||
if iState < len(words) {
|
||||
states[iState].NextStates, _ = addWordToStates(states[iState].NextStates, words[i+1])
|
||||
}
|
||||
}
|
||||
|
||||
//count prob
|
||||
for i := 0; i < len(states); i++ {
|
||||
states[i].Prob = (float64(states[i].Count) / float64(len(words)) * 100)
|
||||
for j := 0; j < len(states[i].NextStates); j++ {
|
||||
states[i].NextStates[j].Prob = (float64(states[i].NextStates[j].Count) / float64(len(words)) * 100)
|
||||
}
|
||||
}
|
||||
fmt.Println("total words computed: " + strconv.Itoa(len(words)))
|
||||
//fmt.Println(states)
|
||||
return states
|
||||
}
|
||||
|
||||
func textToWords(text string) []string {
|
||||
s := strings.Split(text, " ")
|
||||
words := s
|
||||
return words
|
||||
}
|
||||
|
||||
func (markov Markov) train(text string) []State {
|
||||
|
||||
words := textToWords(text)
|
||||
states := calcMarkovStates(words)
|
||||
//fmt.Println(states)
|
||||
|
||||
return states
|
||||
}
|
||||
|
||||
func getNextMarkovState(states []State, word string) string {
|
||||
iState := -1
|
||||
for i := 0; i < len(states); i++ {
|
||||
if states[i].Word == word {
|
||||
iState = i
|
||||
}
|
||||
}
|
||||
if iState < 0 {
|
||||
return "word no exist on the memory"
|
||||
}
|
||||
var next State
|
||||
next = states[iState].NextStates[0]
|
||||
next.Prob = rand.Float64() * states[iState].Prob
|
||||
for i := 0; i < len(states[iState].NextStates); i++ {
|
||||
if (rand.Float64()*states[iState].NextStates[i].Prob) > next.Prob && states[iState-1].Word != states[iState].NextStates[i].Word {
|
||||
next = states[iState].NextStates[i]
|
||||
}
|
||||
}
|
||||
return next.Word
|
||||
}
|
||||
func (markov Markov) generateText(states []State, initWord string, count int) string {
|
||||
var generatedText []string
|
||||
word := initWord
|
||||
generatedText = append(generatedText, word)
|
||||
for i := 0; i < count; i++ {
|
||||
word = getNextMarkovState(states, word)
|
||||
generatedText = append(generatedText, word)
|
||||
}
|
||||
generatedText = append(generatedText, ".")
|
||||
text := strings.Join(generatedText, " ")
|
||||
return text
|
||||
}
|
||||
Reference in New Issue
Block a user