Update util dep

This commit is contained in:
Kegan Dougal 2017-03-08 16:25:55 +00:00
parent 07a1084e5a
commit 9d91aa6c1f
2 changed files with 153 additions and 0 deletions

View file

@ -0,0 +1,57 @@
package util
import (
"fmt"
"sort"
)
// Unique removes duplicate items from a sorted list in place.
// Takes the same interface as sort.Sort
// Returns the length of the data without duplicates
// Uses the last occurrence of a duplicate.
// O(n).
func Unique(data sort.Interface) int {
if !sort.IsSorted(data) {
panic(fmt.Errorf("util: the input to Unique() must be sorted"))
}
if data.Len() == 0 {
return 0
}
length := data.Len()
// j is the next index to output an element to.
j := 0
for i := 1; i < length; i++ {
// If the previous element is less than this element then they are
// not equal. Otherwise they must be equal because the list is sorted.
// If they are equal then we move onto the next element.
if data.Less(i-1, i) {
// "Write" the previous element to the output position by swapping
// the elements.
// Note that if the list has no duplicates then i-1 == j so the
// swap does nothing. (This assumes that data.Swap(a,b) nops if a==b)
data.Swap(i-1, j)
// Advance to the next output position in the list.
j++
}
}
// Output the last element.
data.Swap(length-1, j)
return j + 1
}
// SortAndUnique sorts a list and removes duplicate entries in place.
// Takes the same interface as sort.Sort
// Returns the length of the data without duplicates
// Uses the last occurrence of a duplicate.
// O(nlog(n))
func SortAndUnique(data sort.Interface) int {
sort.Sort(data)
return Unique(data)
}
// UniqueStrings turns a list of strings into a sorted list of unique strings.
// O(nlog(n))
func UniqueStrings(strings []string) []string {
return strings[:SortAndUnique(sort.StringSlice(strings))]
}

View file

@ -0,0 +1,96 @@
package util
import (
"sort"
"testing"
)
type sortBytes []byte
func (s sortBytes) Len() int { return len(s) }
func (s sortBytes) Less(i, j int) bool { return s[i] < s[j] }
func (s sortBytes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func TestUnique(t *testing.T) {
testCases := []struct {
Input string
Want string
}{
{"", ""},
{"abc", "abc"},
{"aaabbbccc", "abc"},
}
for _, test := range testCases {
input := []byte(test.Input)
want := string(test.Want)
got := string(input[:Unique(sortBytes(input))])
if got != want {
t.Fatal("Wanted ", want, " got ", got)
}
}
}
type sortByFirstByte []string
func (s sortByFirstByte) Len() int { return len(s) }
func (s sortByFirstByte) Less(i, j int) bool { return s[i][0] < s[j][0] }
func (s sortByFirstByte) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func TestUniquePicksLastDuplicate(t *testing.T) {
input := []string{
"aardvark",
"avacado",
"cat",
"cucumber",
}
want := []string{
"avacado",
"cucumber",
}
got := input[:Unique(sortByFirstByte(input))]
if len(want) != len(got) {
t.Errorf("Wanted %#v got %#v", want, got)
}
for i := range want {
if want[i] != got[i] {
t.Errorf("Wanted %#v got %#v", want, got)
}
}
}
func TestUniquePanicsIfNotSorted(t *testing.T) {
defer func() {
if r := recover(); r == nil {
t.Error("Expected Unique() to panic on unsorted input but it didn't")
}
}()
Unique(sort.StringSlice{"out", "of", "order"})
}
func TestUniqueStrings(t *testing.T) {
input := []string{
"badger", "badger", "badger", "badger",
"badger", "badger", "badger", "badger",
"badger", "badger", "badger", "badger",
"mushroom", "mushroom",
"badger", "badger", "badger", "badger",
"badger", "badger", "badger", "badger",
"badger", "badger", "badger", "badger",
"snake", "snake",
}
want := []string{"badger", "mushroom", "snake"}
got := UniqueStrings(input)
if len(want) != len(got) {
t.Errorf("Wanted %#v got %#v", want, got)
}
for i := range want {
if want[i] != got[i] {
t.Errorf("Wanted %#v got %#v", want, got)
}
}
}