From 0068854d5fede44a83b4158cacd4aaea53a16cd1 Mon Sep 17 00:00:00 2001 From: arnaucode Date: Sat, 22 Apr 2017 16:39:47 +0200 Subject: [PATCH] implemented text generation with markov chains --- .gitignore | 27 +++++++++++++ README.md | 8 ++++ main.go | 48 +++++++++++++++++++++++ markov.go | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 194 insertions(+) create mode 100644 .gitignore create mode 100644 main.go create mode 100644 markov.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8cb409e --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof + + +text.txt diff --git a/README.md b/README.md index 1a2b215..4f690b3 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,10 @@ # goMarkov markov chains text generator written in Go from scratch + + +```go +states := markov.train(text) +generatedText := markov.generateText(states, firstWord, count) +fmt.Println(generatedText) +``` +(in the text variable, goes the text content, can be loaded from a .txt file) diff --git a/main.go b/main.go new file mode 100644 index 0000000..840d76c --- /dev/null +++ b/main.go @@ -0,0 +1,48 @@ +package main + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "strconv" + "strings" +) + +func readText(path string) (string, error) { + data, err := ioutil.ReadFile(path) + if err != nil { + //Do something + } + dataClean := strings.Replace(string(data), "\n", " ", -1) + content := string(dataClean) + return content, err +} + +func main() { + fmt.Print("entry the first word: ") + newcommand := bufio.NewReader(os.Stdin) + firstWord, _ := newcommand.ReadString('\n') + firstWord = strings.TrimSpace(firstWord) + fmt.Print("first word: ") + fmt.Println(firstWord) + + fmt.Println("how many words you want on the text?") + newcommand = bufio.NewReader(os.Stdin) + answer, _ := newcommand.ReadString('\n') + answer = strings.TrimSpace(answer) + fmt.Print("Number of words on text to generate: ") + fmt.Println(answer) + count, err := strconv.Atoi(answer) + if err != nil { + fmt.Println("incorrect entry, need a positive number") + } + + text, _ := readText("text.txt") + + fmt.Println("generating text") + states := markov.train(text) + generatedText := markov.generateText(states, firstWord, count) + fmt.Println("") + fmt.Println(generatedText) +} diff --git a/markov.go b/markov.go new file mode 100644 index 0000000..b6fa06d --- /dev/null +++ b/markov.go @@ -0,0 +1,111 @@ +package main + +import ( + "fmt" + "math/rand" + "strconv" + "strings" +) + +type Markov struct{} + +type State struct { + Word string + Count int + Prob float64 + NextStates []State +} + +var markov Markov + +func addWordToStates(states []State, word string) ([]State, int) { + iState := -1 + for i := 0; i < len(states); i++ { + if states[i].Word == word { + iState = i + } + } + if iState >= 0 { + states[iState].Count++ + } else { + var tempState State + tempState.Word = word + tempState.Count = 1 + + states = append(states, tempState) + iState = len(states) - 1 + + } + return states, iState +} + +func calcMarkovStates(words []string) []State { + var states []State + //count words + for i := 0; i < len(words)-1; i++ { + var iState int + states, iState = addWordToStates(states, words[i]) + if iState < len(words) { + states[iState].NextStates, _ = addWordToStates(states[iState].NextStates, words[i+1]) + } + } + + //count prob + for i := 0; i < len(states); i++ { + states[i].Prob = (float64(states[i].Count) / float64(len(words)) * 100) + for j := 0; j < len(states[i].NextStates); j++ { + states[i].NextStates[j].Prob = (float64(states[i].NextStates[j].Count) / float64(len(words)) * 100) + } + } + fmt.Println("total words computed: " + strconv.Itoa(len(words))) + //fmt.Println(states) + return states +} + +func textToWords(text string) []string { + s := strings.Split(text, " ") + words := s + return words +} + +func (markov Markov) train(text string) []State { + + words := textToWords(text) + states := calcMarkovStates(words) + //fmt.Println(states) + + return states +} + +func getNextMarkovState(states []State, word string) string { + iState := -1 + for i := 0; i < len(states); i++ { + if states[i].Word == word { + iState = i + } + } + if iState < 0 { + return "word no exist on the memory" + } + var next State + next = states[iState].NextStates[0] + next.Prob = rand.Float64() * states[iState].Prob + for i := 0; i < len(states[iState].NextStates); i++ { + if (rand.Float64()*states[iState].NextStates[i].Prob) > next.Prob && states[iState-1].Word != states[iState].NextStates[i].Word { + next = states[iState].NextStates[i] + } + } + return next.Word +} +func (markov Markov) generateText(states []State, initWord string, count int) string { + var generatedText []string + word := initWord + generatedText = append(generatedText, word) + for i := 0; i < count; i++ { + word = getNextMarkovState(states, word) + generatedText = append(generatedText, word) + } + generatedText = append(generatedText, ".") + text := strings.Join(generatedText, " ") + return text +}