Go Data Structures: Set

In this technical blog, we will analyze and implement the Set data structure in Go programming language.

A Set is a collection of values that allows various operations such as iteration, addition, removal, size retrieval, and checking if an item is present. Sets do not allow duplicates, meaning each value can only be stored once.

First implementation

Let’s start with a simple implementation of the Set data structure. This implementation is not yet concurrency safe, as we have skipped locking resources for the sake of simplicity and understanding. We will add locking later in the article.

Here is the code for the basic implementation:

// Package set creates an ItemSet data structure for the Item type
package set

import "github.com/cheekybits/genny/generic"

// Item is the type of the Set
type Item generic.Type

// ItemSet is the set of Items
type ItemSet struct {
	items map[Item]bool
}

// Add adds a new element to the Set. Returns a pointer to the Set.
func (s *ItemSet) Add(t Item) *ItemSet {
	if s.items == nil {
		s.items = make(map[Item]bool)
	}
	_, ok := s.items[t]
	if !ok {
		s.items[t] = true
	}
	return s
}

// Clear removes all elements from the Set
func (s *ItemSet) Clear() {
	s.items = make(map[Item]bool)
}

// Delete removes the Item from the Set and returns whether it was present
func (s *ItemSet) Delete(item Item) bool {
	_, ok := s.items[item]
	if ok {
		delete(s.items, item)
	}
	return ok
}

// Has returns true if the Set contains the Item
func (s *ItemSet) Has(item Item) bool {
	_, ok := s.items[item]
	return ok
}

// Items returns the Item(s) stored
func (s *ItemSet) Items() []Item {
	items := []Item{}
	for i := range s.items {
		items = append(items, i)
	}
	return items
}

// Size returns the size of the Set
func (s *ItemSet) Size() int {
	return len(s.items)
}

Testing the implementation

To test the above code, we need to write a test suite. This will also help us understand how to use the Set data structure and verify the expected results for each operation. Here is the test suite:

package set

import (
	"fmt"
	"testing"
)

func populateSet(count, start int) *ItemSet {
	set := ItemSet{}
	for i := start; i < (start + count); i++ {
		set.Add(fmt.Sprintf("item%d", i))
	}
	return &set
}

func TestAdd(t *testing.T) {
	set := populateSet(3, 0)
	if size := set.Size(); size != 3 {
		t.Errorf("wrong count, expected 3 and got %d", size)
	}
	set.Add("item1") //should not add it, already there
	if size := set.Size(); size != 3 {
		t.Errorf("wrong count, expected 3 and got %d", size)
	}
	set.Add("item4") //should not add it, already there
	if size := set.Size(); size != 4 {
		t.Errorf("wrong count, expected 4 and got %d", size)
	}
}

func TestClear(t *testing.T) {
	set := populateSet(3, 0)
	set.Clear()
	if size := set.Size(); size != 0 {
		t.Errorf("wrong count, expected 0 and got %d", size)
	}
}

func TestDelete(t *testing.T) {
	set := populateSet(3, 0)
	set.Delete("item2")
	if size := set.Size(); size != 2 {
		t.Errorf("wrong count, expected 2 and got %d", size)
	}
}

func TestHas(t *testing.T) {
	set := populateSet(3, 0)
	has := set.Has("item2")
	if !has {
		t.Errorf("expected item2 to be there")
	}
	set.Delete("item2")
	has = set.Has("item2")
	if has {
		t.Errorf("expected item2 to be removed")
	}
	set.Delete("item1")
	has = set.Has("item1")
	if has {
		t.Errorf("expected item1 to be removed")
	}
}

func TestItems(t *testing.T) {
	set := populateSet(3, 0)
	items := set.Items()
	if len(items) != 3 {
		t.Errorf("wrong count, expected 3 and got %d", len(items))
	}
	set = populateSet(520, 0)
	items = set.Items()
	if len(items) != 520 {
		t.Errorf("wrong count, expected 520 and got %d", len(items))
	}
}

func TestSize(t *testing.T) {
	set := populateSet(3, 0)
	items := set.Items()
	if len(items) != set.Size() {
		t.Errorf("wrong count, expected %d and got %d", set.Size(), len(items))
	}
	set = populateSet(0, 0)
	items = set.Items()
	if len(items) != set.Size() {
		t.Errorf("wrong count, expected %d and got %d", set.Size(), len(items))
	}
	set = populateSet(10000, 0)
	items = set.Items()
	if len(items) != set.Size() {
		t.Errorf("wrong count, expected %d and got %d", set.Size(), len(items))
	}
}

Concurrency safe version

The first version of the Set implementation is not concurrency safe because a routine might add an item to the set while another routine is fetching the list of items or checking the size.

To make the implementation concurrency safe, we will introduce a sync.RWMutex to lock and unlock the resources. This will prevent simultaneous modifications by different routines.

Here is the modified code with concurrency safety:

// Package set creates an ItemSet data structure for the Item type
package set

import (
	"sync"

	"github.com/cheekybits/genny/generic"
)

// Item is the type of the Set
type Item generic.Type

// ItemSet is the set of Items
type ItemSet struct {
	items map[Item]bool
	lock  sync.RWMutex
}

// Add adds a new element to the Set. Returns a pointer to the Set.
func (s *ItemSet) Add(t Item) *ItemSet {
	s.lock.Lock()
	defer s.lock.Unlock()
	if s.items == nil {
		s.items = make(map[Item]bool)
	}
	_, ok := s.items[t]
	if !ok {
		s.items[t] = true
	}
	return s
}

// Clear removes all elements from the Set
func (s *ItemSet) Clear() {
	s.lock.Lock()
	defer s.lock.Unlock()
	s.items = make(map[Item]bool)
}

// Delete removes the Item from the Set and returns whether it was present
func (s *ItemSet) Delete(item Item) bool {
	s.lock.Lock()
	defer s.lock.Unlock()
	_, ok := s.items[item]
	if ok {
		delete(s.items, item)
	}
	return ok
}

// Has returns true if the Set contains the Item
func (s *ItemSet) Has(item Item) bool {
	s.lock.RLock()
	defer s.lock.RUnlock()
	_, ok := s.items[item]
	return ok
}

// Items returns the Item(s) stored
func (s *ItemSet) Items() []Item {
	s.lock.RLock()
	defer s.lock.RUnlock()
	items := []Item{}
	for i := range s.items {
		items = append(items, i)
	}
	return items
}

// Size returns the size of the Set
func (s *ItemSet) Size() int {
	s.lock.RLock()
	defer s.lock.RUnlock()
	return len(s.items)
}

Creating a concrete set data structure

To create specific implementations of the Set data structure for different types, we can use the genny tool. This allows us to generate concrete sets based on our generic Set implementation.

Here’s an example of how to generate a StringSet set of strings:

1 2	// Generate a `StringSet` set of `string`s genny -in set.go -out set-string.go gen "Item=string"

And here’s an example of how to generate an IntSet set of ints:

1 2	// Generate an `IntSet` set of `int`s genny -in set.go -out set-int.go gen "Item=int"

The generated files will contain concrete implementations of the Set data structure specific to the given type.

Adding more Set operations

Our Set data structure can be further enhanced by implementing some common mathematical set operations like union, intersection, difference, and subset.

Union

The union operation creates a new set with elements from both given sets.

Here is the code for the Union operation:

// Union returns a new set with elements from both the given sets
func (s *ItemSet) Union(s2 *ItemSet) *ItemSet {
	s3 := ItemSet{}
	s3.items = make(map[Item]bool)
	s.lock.RLock()
	for i := range s.items {
		s3.items[i] = true
	}
	s.lock.RUnlock()
	s2.lock.RLock()
	for i := range s2.items {
		_, ok := s3.items[i]
		if !ok {
			s3.items[i] = true
		}
	}
	s2.lock.RUnlock()
	return &s3
}

Test

Here is the test for the Union operation:


func TestUnion(t *testing.T) {
	set1 := populateSet(3, 0)
	set2 := populateSet(2, 3)

	set3 := set1.Union(set2)

	if len(set3.Items()) != 5 {
		t.Errorf("wrong count, expected 5 and got %d", set3.Size())
	}
	// Don't edit original sets
	if len(set1.Items()) != 3 {
		t.Errorf("wrong count, expected 3 and got %d", set1.Size())
	}
	if len(set2.Items()) != 2 {
		t.Errorf("wrong count, expected 2 and got %d", set2.Size())
	}
}

Intersection

The intersection operation creates a new set with elements that exist in both given sets.

Here is the code for the Intersection operation:

// Intersection returns a new set with elements that exist in both sets
func (s *ItemSet) Intersection(s2 *ItemSet) *ItemSet {
	s3 := ItemSet{}
	s3.items = make(map[Item]bool)
	s.lock.RLock()
	s2.lock.RLock()
	defer s.lock.RUnlock()
	defer s2.lock.RUnlock()
	for i := range s2.items {
		_, ok := s.items[i]
		if ok {
			s3.items[i] = true
		}
	}
	return &s3
}

Test

Here is the test for the Intersection operation:

func TestIntersection(t *testing.T) {
	set1 := populateSet(3, 0)
	set2 := populateSet(2, 0)

	set3 := set1.Intersection(set2)

	if len(set3.Items()) != 2 {
		t.Errorf("wrong count, expected 2 and got %d", set3.Size())
	}
	// Don't edit original sets
	if len(set1.Items()) != 3 {
		t.Errorf("wrong count, expected 3 and got %d", set1.Size())
	}
	if len(set2.Items()) != 2 {
		t.Errorf("wrong count, expected 2 and got %d", set2.Size())
	}
}

Difference

The difference operation creates a new set with elements that exist in the first set and don’t exist in the second set.

Here is the code for the Difference operation:

// Difference returns a new set with elements that exist in the first set and don't exist in the second set
func (s *ItemSet) Difference(s2 *ItemSet) *ItemSet {
	s3 := ItemSet{}
	s3.items = make(map[Item]bool)
	s.lock.RLock()
	s2.lock.RLock()
	defer s.lock.RUnlock()
	defer s2.lock.RUnlock()
	for i := range s.items {
		_, ok := s2.items[i]
		if !ok {
			s3.items[i] = true
		}
	}
	return &s3
}

Test

Here is the test for the Difference operation:

func TestDifference(t *testing.T) {
	set1 := populateSet(3, 0)
	set2 := populateSet(2, 0)

	set3 := set1.Difference(set2)

	if len(set3.Items()) != 1 {
		t.Errorf("wrong count, expected 2 and got %d", set3.Size())
	}
	// Don't edit original sets
	if len(set1.Items()) != 3 {
		t.Errorf("wrong count, expected 3 and got %d", set1.Size())
	}
	if len(set2.Items()) != 2 {
		t.Errorf("wrong count, expected 2 and got %d", set2.Size())
	}
}

Subset

The subset operation checks if a set is a subset of another set.

Here is the code for the Subset operation:

// Subset returns true if s is a subset of s2
func (s *ItemSet) Subset(s2 *ItemSet) bool {
	s.lock.RLock()
	s2.lock.RLock()
	defer s.lock.RUnlock()
	defer s2.lock.RUnlock()
	for i := range s.items {
		_, ok := s2.items[i]
		if !ok {
			return false
		}
	}
	return true
}

Test

Here is the test for the Subset operation:

func TestSubset(t *testing.T) {
	set1 := populateSet(3, 0)
	set2 := populateSet(2, 0)

	if set1.Subset(set2) {
		t.Errorf("expected false and got true")
	}

	// Don't edit original sets
	if len(set1.Items()) != 3 {
		t.Errorf("wrong count, expected 3 and got %d", set1.Size())
	}
	if len(set2.Items()) != 2 {
		t.Errorf("wrong count, expected 2 and got %d", set2.Size())
	}

	// Try real subsets
	set1 = populateSet(2, 0)
	if !set1.Subset(set2) {
		t.Errorf("expected true and got false")
	}

	set1 = populateSet(1, 0)
	if !set1.Subset(set2) {
		t.Errorf("expected true and got false")
	}
}

Tags: Go, Data Structures, Set