The Algorithms logoThe Algorithms
About
package ahocorasick

import (
	"fmt"
	"time"
)

// User defined.
// Set to true to print various extra stuff out (slows down the execution)
// Set to false for quick and quiet execution.
// const debugMode bool = true

type Result struct {
	occurrences map[string][]int
}

// Implementation of Basic Aho-Corasick algorithm (Prefix based).
// Searches for a set of strings (patterns.txt) in text.txt.
// func main() {
// 	patFile, err := ioutil.ReadFile("../patterns.txt")
// 	if err != nil {
// 		log.Fatal(err)
// 	}
// 	textFile, err := ioutil.ReadFile("../text.txt")
// 	if err != nil {
// 		log.Fatal(err)
// 	}
// 	patterns := strings.Split(string(patFile), " ")
// 	fmt.Printf("\nRunning: Basic Aho-Corasick algorithm.\n\n")
// 	if debugMode == true {
// 		fmt.Printf("Searching for %d patterns/words:\n", len(patterns))
// 	}
// 	for i := 0; i < len(patterns); i++ {
// 		if len(patterns[i]) > len(textFile) {
// 			log.Fatal("There is a pattern that is longer than text! Pattern number:", i+1)
// 		}
// 		if debugMode == true {
// 			fmt.Printf("%q ", patterns[i])
// 		}
// 	}
// 	if debugMode == true {
// 		fmt.Printf("\n\nIn text (%d chars long): \n%q\n\n", len(textFile), textFile)
// 	}
// 	r := ahoCorasick(string(textFile), patterns)
// 	for key, value := range r.occurrences { //prints all occurrences of each pattern (if there was at least one)
// 		fmt.Printf("\nThere were %d occurrences for word: %q at positions: ", len(value), key)
// 		for i := range value {
// 			fmt.Printf("%d", value[i])
// 			if i != len(value)-1 {
// 				fmt.Printf(", ")
// 			}
// 		}
// 		fmt.Printf(".")
// 	}
// }

// AhoCorasick Function performing the Basic Aho-Corasick algorithm.
// Finds and prints occurrences of each pattern.
func AhoCorasick(t string, p []string) Result {
	startTime := time.Now()
	occurrences := make(map[int][]int)
	ac, f, s := buildAc(p)
	// if debugMode == true {
	// 	fmt.Printf("\n\nAC:\n\n")
	// }
	current := 0
	for pos := 0; pos < len(t); pos++ {
		// if debugMode == true {
		// 	fmt.Printf("Position: %d, we read: %c", pos, t[pos])
		// }
		for getTransition(current, t[pos], ac) == -1 && s[current] != -1 {
			current = s[current]
		}
		if getTransition(current, t[pos], ac) != -1 {
			current = getTransition(current, t[pos], ac)
			fmt.Printf(" (Continue) \n")
		} else {
			current = 0
			// if debugMode == true {
			// 	fmt.Printf(" (FAIL) \n")
			// }
		}
		_, ok := f[current]
		if ok {
			for i := range f[current] {
				if p[f[current][i]] == getWord(pos-len(p[f[current][i]])+1, pos, t) { //check for word match
					// if debugMode == true {
					// 	fmt.Printf("Occurrence at position %d, %q = %q\n", pos-len(p[f[current][i]])+1, p[f[current][i]], p[f[current][i]])
					// }
					newOccurrences := intArrayCapUp(occurrences[f[current][i]])
					occurrences[f[current][i]] = newOccurrences
					occurrences[f[current][i]][len(newOccurrences)-1] = pos - len(p[f[current][i]]) + 1
				}
			}
		}
	}
	elapsed := time.Since(startTime)
	fmt.Printf("\n\nElapsed %f secs\n", elapsed.Seconds())
	var resultOccurrences = make(map[string][]int)
	for key, value := range occurrences {
		resultOccurrences[p[key]] = value
	}

	return Result{
		resultOccurrences,
	}
}

// Functions that builds Aho Corasick automaton.
func buildAc(p []string) (acToReturn map[int]map[uint8]int, f map[int][]int, s []int) {
	acTrie, stateIsTerminal, f := constructTrie(p)
	s = make([]int, len(stateIsTerminal)) //supply function
	i := 0                                //root of acTrie
	acToReturn = acTrie
	s[i] = -1
	// if debugMode == true {
	// 	fmt.Printf("\n\nAC construction: \n")
	// }
	for current := 1; current < len(stateIsTerminal); current++ {
		o, parent := getParent(current, acTrie)
		down := s[parent]
		for stateExists(down, acToReturn) && getTransition(down, o, acToReturn) == -1 {
			down = s[down]
		}
		if stateExists(down, acToReturn) {
			s[current] = getTransition(down, o, acToReturn)
			if stateIsTerminal[s[current]] {
				stateIsTerminal[current] = true
				f[current] = arrayUnion(f[current], f[s[current]]) //F(Current) <- F(Current) union F(S(Current))
				// if debugMode == true {
				// 	fmt.Printf(" f[%d] set to: ", current)
				// 	for i := range f[current] {
				// 		fmt.Printf("%d\n", f[current][i])
				// 	}
				// }
			}
		} else {
			s[current] = i //initial state?
		}
	}
	// if debugMode == true {
	// 	fmt.Printf("\nsupply function: \n")
	// 	for i := range s {
	// 		fmt.Printf("\ns[%d]=%d", i, s[i])
	// 	}
	// 	fmt.Printf("\n\n")
	// 	for i, j := range f {
	// 		fmt.Printf("f[%d]=", i)
	// 		for k := range j {
	// 			fmt.Printf("%d\n", j[k])
	// 		}
	// 	}
	// }
	return acToReturn, f, s
}

// Function that constructs Trie as an automaton for a set of reversed & trimmed strings.
func constructTrie(p []string) (trie map[int]map[uint8]int, stateIsTerminal []bool, f map[int][]int) {
	trie = make(map[int]map[uint8]int)
	stateIsTerminal = make([]bool, 1)
	f = make(map[int][]int)
	state := 1
	// if debugMode == true {
	// 	fmt.Printf("\n\nTrie construction: \n")
	// }
	createNewState(0, trie)
	for i := 0; i < len(p); i++ {
		current := 0
		j := 0
		for j < len(p[i]) && getTransition(current, p[i][j], trie) != -1 {
			current = getTransition(current, p[i][j], trie)
			j++
		}
		for j < len(p[i]) {
			stateIsTerminal = boolArrayCapUp(stateIsTerminal)
			createNewState(state, trie)
			stateIsTerminal[state] = false
			createTransition(current, p[i][j], state, trie)
			current = state
			j++
			state++
		}
		if stateIsTerminal[current] {
			newArray := intArrayCapUp(f[current])
			newArray[len(newArray)-1] = i
			f[current] = newArray //F(Current) <- F(Current) union {i}
			// if debugMode == true {
			// 	fmt.Printf(" and %d", i)
			// }
		} else {
			stateIsTerminal[current] = true
			f[current] = []int{i} //F(Current) <- {i}
			// if debugMode == true {
			// 	fmt.Printf("\n%d is terminal for word number %d", current, i)
			// }
		}
	}
	return trie, stateIsTerminal, f
}

/**
Returns 'true' if array of int's 's' contains int 'e', 'false' otherwise.

@author Mostafa http://stackoverflow.com/a/10485970
*/
func contains(s []int, e int) bool {
	for _, a := range s {
		if a == e {
			return true
		}
	}
	return false
}

// Function that returns word found in text 't' at position range 'begin' to 'end'.
func getWord(begin, end int, t string) string {
	for end >= len(t) {
		return ""
	}
	d := make([]uint8, end-begin+1)
	for j, i := 0, begin; i <= end; i, j = i+1, j+1 {
		d[j] = t[i]
	}
	return string(d)
}

// Dynamically increases an array size of int's by 1.
func intArrayCapUp(old []int) (new []int) {
	new = make([]int, cap(old)+1)
	copy(new, old) //copy(dst,src)
	// old = new
	return new
}

// Dynamically increases an array size of bool's by 1.
func boolArrayCapUp(old []bool) (new []bool) {
	new = make([]bool, cap(old)+1)
	copy(new, old)
	// old = new
	return new
}

// Concats two arrays of int's into one.
func arrayUnion(to, from []int) (concat []int) {
	concat = to
	for i := range from {
		if !contains(concat, from[i]) {
			concat = intArrayCapUp(concat)
			concat[len(concat)-1] = from[i]
		}
	}
	return concat
}

// Function that finds the first previous state of a state and returns it.
// Used for trie where there is only one parent.
func getParent(state int, at map[int]map[uint8]int) (uint8, int) {
	for beginState, transitions := range at {
		for c, endState := range transitions {
			if endState == state {
				return c, beginState
			}
		}
	}
	return 0, 0 //unreachable
}

// Automaton function for creating a new state 'state'.
func createNewState(state int, at map[int]map[uint8]int) {
	at[state] = make(map[uint8]int)
	// if debugMode == true {
	// 	fmt.Printf("\ncreated state %d", state)
	// }
}

// Creates a transition for function σ(state,letter) = end.
func createTransition(fromState int, overChar uint8, toState int, at map[int]map[uint8]int) {
	at[fromState][overChar] = toState
	// if debugMode == true {
	// 	fmt.Printf("\n    σ(%d,%c)=%d;", fromState, overChar, toState)
	// }
}

// Returns ending state for transition σ(fromState,overChar), '-1' if there is none.
func getTransition(fromState int, overChar uint8, at map[int]map[uint8]int) (toState int) {
	if !stateExists(fromState, at) {
		return -1
	}
	toState, ok := at[fromState][overChar]
	if !ok {
		return -1
	}
	return toState
}

// Checks if state 'state' exists. Returns 'true' if it does, 'false' otherwise.
func stateExists(state int, at map[int]map[uint8]int) bool {
	_, ok := at[state]
	if !ok || state == -1 || at[state] == nil {
		return false
	}
	return true
}

Ac

h
T