The Algorithms logoThe Algorithms
About
package advancedahocorasick

import (
	"fmt"
	"time"
)

// User defined.
// Set to true to print various extra stuff out (slows down the execution)
// Set to false for quick and quiet execution.
// const debugMode bool = true // TODO: very convoluted algorithm need time to read through it properly

// Result structure
type Result struct {
	occurrences map[string][]int
}

// Implementation of Advanced Aho-Corasick algorithm (Prefix based).
// Searches for a set of strings (patterns.txt) in text.txt.
// func main() {
// 	patFile, err := ioutil.ReadFile("../patterns.txt")
// 	if err != nil {
// 		log.Fatal(err)
// 	}
// 	textFile, err := ioutil.ReadFile("../text.txt")
// 	if err != nil {
// 		log.Fatal(err)
// 	}
// 	patterns := strings.Split(string(patFile), " ")
// 	fmt.Printf("\nRunning: Advanced Aho-Corasick algorithm.\n\n")
// 	if debugMode == true {
// 		fmt.Printf("Searching for %d patterns/words:\n", len(patterns))
// 	}
// 	for i := 0; i < len(patterns); i++ {
// 		if len(patterns[i]) > len(textFile) {
// 			log.Fatal("There is a pattern that is longer than text! Pattern number:", i+1)
// 		}
// 		if debugMode == true {
// 			fmt.Printf("%q ", patterns[i])
// 		}
// 	}
// 	if debugMode == true {
// 		fmt.Printf("\n\nIn text (%d chars long): \n%q\n\n", len(textFile), textFile)
// 	}
// 	r := ahoCorasick(string(textFile), patterns)
// 	for key, value := range r.occurrences { //prints all occurrences of each pattern (if there was at least one)
// 		fmt.Printf("\nThere were %d occurrences for word: %q at positions: ", len(value), key)
// 		for i := range value {
// 			fmt.Printf("%d", value[i])
// 			if i != len(value)-1 {
// 				fmt.Printf(", ")
// 			}
// 		}
// 		fmt.Printf(".")
// 	}
// 	return
// }

// AhoCorasick Function performing the Advanced Aho-Corasick alghoritm.
// Finds and prints occurrences of each pattern.
func AhoCorasick(t string, p []string) Result {
	startTime := time.Now()
	occurrences := make(map[int][]int)
	ac, f := BuildExtendedAc(p)
	// if debugMode == true {
	// 	fmt.Printf("\n\nAC:\n\n")
	// }
	current := 0
	for pos := 0; pos < len(t); pos++ {
		if GetTransition(current, t[pos], ac) != -1 {
			current = GetTransition(current, t[pos], ac)
		} else {
			current = 0
		}
		_, ok := f[current]
		if ok {
			for i := range f[current] {
				if p[f[current][i]] == GetWord(pos-len(p[f[current][i]])+1, pos, t) { //check for word match
					// if debugMode == true {
					// 	fmt.Printf("Occurrence at position %d, %q = %q\n", pos-len(p[f[current][i]])+1, p[f[current][i]], p[f[current][i]])
					// }
					newOccurrences := IntArrayCapUp(occurrences[f[current][i]])
					occurrences[f[current][i]] = newOccurrences
					occurrences[f[current][i]][len(newOccurrences)-1] = pos - len(p[f[current][i]]) + 1
				}
			}
		}
	}
	elapsed := time.Since(startTime)
	fmt.Printf("\n\nElapsed %f secs\n", elapsed.Seconds())

	var resultOccurrences = make(map[string][]int)
	for key, value := range occurrences {
		resultOccurrences[p[key]] = value
	}

	return Result{
		resultOccurrences,
	}
}

// BuildExtendedAc Functions that builds extended Aho Corasick automaton.
func BuildExtendedAc(p []string) (acToReturn map[int]map[uint8]int, f map[int][]int) {
	acTrie, stateIsTerminal, f := ConstructTrie(p)
	s := make([]int, len(stateIsTerminal)) //supply function
	i := 0                                 //root of acTrie
	acToReturn = acTrie
	s[i] = -1
	// if debugMode == true {
	// 	fmt.Printf("\n\nAC construction: \n")
	// }
	for current := 1; current < len(stateIsTerminal); current++ {
		o, parent := GetParent(current, acTrie)
		down := s[parent]
		for StateExists(down, acToReturn) && GetTransition(down, o, acToReturn) == -1 {
			down = s[down]
		}
		if StateExists(down, acToReturn) {
			s[current] = GetTransition(down, o, acToReturn)
			if stateIsTerminal[s[current]] {
				stateIsTerminal[current] = true
				f[current] = ArrayUnion(f[current], f[s[current]]) //F(Current) <- F(Current) union F(S(Current))
				// if debugMode == true {
				// 	fmt.Printf(" f[%d] set to: ", current)
				// 	for i := range f[current] {
				// 		fmt.Printf("%d\n", f[current][i])
				// 	}
				// }
			}
		} else {
			s[current] = i //initial state?
		}
	}
	// if debugMode == true {
	// 	fmt.Printf("\nsupply function: \n")
	// 	for i := range s {
	// 		fmt.Printf("\ns[%d]=%d", i, s[i])
	// 	}
	// 	fmt.Printf("\n\n")
	// 	for i, j := range f {
	// 		fmt.Printf("f[%d]=", i)
	// 		for k := range j {
	// 			fmt.Printf("%d\n", j[k])
	// 		}
	// 	}
	// }
	// if debugMode == true {
	// 	fmt.Printf("\n\nAdAC completion: \n")
	// }
	// advanced Aho-Corasick part
	a := ComputeAlphabet(p) // concat of all patterns in p
	for j := range a {
		if GetTransition(i, a[j], acToReturn) == -1 {
			CreateTransition(i, a[j], i, acToReturn)
		}
	}
	for current := 1; current < len(stateIsTerminal); current++ {
		for j := range a {
			if GetTransition(current, a[j], acToReturn) == -1 {
				CreateTransition(current, a[j], GetTransition(s[current], a[j], acToReturn), acToReturn)
			}
		}
	}
	return acToReturn, f
}

// ConstructTrie Function that constructs Trie as an automaton for a set of reversed & trimmed strings.
func ConstructTrie(p []string) (trie map[int]map[uint8]int, stateIsTerminal []bool, f map[int][]int) {
	trie = make(map[int]map[uint8]int)
	stateIsTerminal = make([]bool, 1)
	f = make(map[int][]int)
	state := 1
	// if debugMode == true {
	// 	fmt.Printf("\n\nTrie construction: \n")
	// }
	CreateNewState(0, trie)
	for i := 0; i < len(p); i++ {
		current := 0
		j := 0
		for j < len(p[i]) && GetTransition(current, p[i][j], trie) != -1 {
			current = GetTransition(current, p[i][j], trie)
			j++
		}
		for j < len(p[i]) {
			stateIsTerminal = BoolArrayCapUp(stateIsTerminal)
			CreateNewState(state, trie)
			stateIsTerminal[state] = false
			CreateTransition(current, p[i][j], state, trie)
			current = state
			j++
			state++
		}
		if stateIsTerminal[current] {
			newArray := IntArrayCapUp(f[current])
			newArray[len(newArray)-1] = i
			f[current] = newArray // F(Current) <- F(Current) union {i}
			// if debugMode == true {
			// 	fmt.Printf(" and %d", i)
			// }
		} else {
			stateIsTerminal[current] = true
			f[current] = []int{i} // F(Current) <- {i}
			// if debugMode == true {
			// 	fmt.Printf("\n%d is terminal for word number %d", current, i)
			// }
		}
	}
	return trie, stateIsTerminal, f
}

// Contains Returns 'true' if arry of int's 's' contains int 'e', 'false' otherwise.
func Contains(s []int, e int) bool {
	for _, a := range s {
		if a == e {
			return true
		}
	}
	return false
}

// GetWord Function that returns word found in text 't' at position range 'begin' to 'end'.
func GetWord(begin, end int, t string) string {
	for end >= len(t) {
		return ""
	}
	d := make([]uint8, end-begin+1)
	for j, i := 0, begin; i <= end; i, j = i+1, j+1 {
		d[j] = t[i]
	}
	return string(d)
}

// ComputeAlphabet Function that returns string of all the possible characters in given patterns.
func ComputeAlphabet(p []string) (s string) {
	s = p[0]
	for i := 1; i < len(p); i++ {
		s = s + p[i]
	}
	return s
}

// IntArrayCapUp Dynamically increases an array size of int's by 1.
func IntArrayCapUp(old []int) (new []int) {
	new = make([]int, cap(old)+1)
	copy(new, old) //copy(dst,src)
	// old = new
	return new
}

// BoolArrayCapUp Dynamically increases an array size of bool's by 1.
func BoolArrayCapUp(old []bool) (new []bool) {
	new = make([]bool, cap(old)+1)
	copy(new, old)
	// old = new
	return new
}

// ArrayUnion Concats two arrays of int's into one.
func ArrayUnion(to, from []int) (concat []int) {
	concat = to
	for i := range from {
		if !Contains(concat, from[i]) {
			concat = IntArrayCapUp(concat)
			concat[len(concat)-1] = from[i]
		}
	}
	return concat
}

// GetParent Function that finds the first previous state of a state and returns it.
// Used for trie where there is only one parent.
func GetParent(state int, at map[int]map[uint8]int) (uint8, int) {
	for beginState, transitions := range at {
		for c, endState := range transitions {
			if endState == state {
				return c, beginState
			}
		}
	}
	return 0, 0 //unreachable
}

// CreateNewState Automaton function for creating a new state 'state'.
func CreateNewState(state int, at map[int]map[uint8]int) {
	at[state] = make(map[uint8]int)
	// if debugMode == true {
	// 	fmt.Printf("\ncreated state %d", state)
	// }
}

// CreateTransition Creates a transition for function σ(state,letter) = end.
func CreateTransition(fromState int, overChar uint8, toState int, at map[int]map[uint8]int) {
	at[fromState][overChar] = toState
	// if debugMode == true {
	// 	fmt.Printf("\n    σ(%d,%c)=%d;", fromState, overChar, toState)
	// }
}

// GetTransition Returns ending state for transition σ(fromState,overChar), '-1' if there is none.
func GetTransition(fromState int, overChar uint8, at map[int]map[uint8]int) (toState int) {
	if !StateExists(fromState, at) {
		return -1
	}
	toState, ok := at[fromState][overChar]
	if !ok {
		return -1
	}
	return toState
}

// StateExists Checks if state 'state' exists. Returns 'true' if it does, 'false' otherwise.
func StateExists(state int, at map[int]map[uint8]int) bool {
	_, ok := at[state]
	if !ok || state == -1 || at[state] == nil {
		return false
	}
	return true
}

Adac

h
T