From 07c5de6e2741c59c61bcdeedb331a4c0e13b155f Mon Sep 17 00:00:00 2001
From: Igor Drozdov <idrozdov@gitlab.com>
Date: Thu, 21 May 2020 09:31:31 +0300
Subject: [PATCH] Cache ranges value in a file instead of a hash

Storing them in a hash consumes a lot of RAM
Introducing a file helps to reduce RAM consumption significantly
---
 internal/lsif_transformer/parser/cache.go     | 56 ++++++++++++++++
 .../lsif_transformer/parser/cache_test.go     | 33 ++++++++++
 internal/lsif_transformer/parser/ranges.go    | 66 ++++++++++++++-----
 .../lsif_transformer/parser/ranges_test.go    |  8 ++-
 4 files changed, 146 insertions(+), 17 deletions(-)
 create mode 100644 internal/lsif_transformer/parser/cache.go
 create mode 100644 internal/lsif_transformer/parser/cache_test.go

diff --git a/internal/lsif_transformer/parser/cache.go b/internal/lsif_transformer/parser/cache.go
new file mode 100644
index 000000000000..f2a695d1734e
--- /dev/null
+++ b/internal/lsif_transformer/parser/cache.go
@@ -0,0 +1,56 @@
+package parser
+
+import (
+	"encoding/binary"
+	"io"
+	"io/ioutil"
+	"os"
+)
+
+// This cache implementation is using a temp file to provide key-value data storage
+// It allows to avoid storing intermediate calculations in RAM
+// The stored data must be a fixed-size value or a slice of fixed-size values, or a pointer to such data
+type cache struct {
+	file      *os.File
+	chunkSize int64
+}
+
+func newCache(tempDir, filename string, data interface{}) (*cache, error) {
+	f, err := ioutil.TempFile(tempDir, filename)
+	if err != nil {
+		return nil, err
+	}
+
+	return &cache{file: f, chunkSize: int64(binary.Size(data))}, nil
+}
+
+func (c *cache) SetEntry(id Id, data interface{}) error {
+	if err := c.setOffset(id); err != nil {
+		return err
+	}
+
+	return binary.Write(c.file, binary.LittleEndian, data)
+}
+
+func (c *cache) Entry(id Id, data interface{}) error {
+	if err := c.setOffset(id); err != nil {
+		return err
+	}
+
+	return binary.Read(c.file, binary.LittleEndian, data)
+}
+
+func (c *cache) Close() error {
+	if err := c.file.Close(); err != nil {
+		return err
+	}
+
+	return os.Remove(c.file.Name())
+}
+
+func (c *cache) setOffset(id Id) error {
+	offset := int64(id) * c.chunkSize
+	_, err := c.file.Seek(offset, io.SeekStart)
+
+	return err
+}
diff --git a/internal/lsif_transformer/parser/cache_test.go b/internal/lsif_transformer/parser/cache_test.go
new file mode 100644
index 000000000000..23a2ac6e9a91
--- /dev/null
+++ b/internal/lsif_transformer/parser/cache_test.go
@@ -0,0 +1,33 @@
+package parser
+
+import (
+	"io/ioutil"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+type chunk struct {
+	A int16
+	B int16
+}
+
+func TestCache(t *testing.T) {
+	cache, err := newCache("", "test-chunks", chunk{})
+	require.NoError(t, err)
+	defer cache.Close()
+
+	c := chunk{A: 1, B: 2}
+	require.NoError(t, cache.SetEntry(1, &c))
+	require.NoError(t, cache.setOffset(0))
+
+	content, err := ioutil.ReadAll(cache.file)
+	require.NoError(t, err)
+
+	expected := []byte{0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x2, 0x0}
+	require.Equal(t, expected, content)
+
+	var nc chunk
+	require.NoError(t, cache.Entry(1, &nc))
+	require.Equal(t, c, nc)
+}
diff --git a/internal/lsif_transformer/parser/ranges.go b/internal/lsif_transformer/parser/ranges.go
index 0d760f53d6c8..1740b7238ec9 100644
--- a/internal/lsif_transformer/parser/ranges.go
+++ b/internal/lsif_transformer/parser/ranges.go
@@ -6,13 +6,15 @@ import (
 	"strconv"
 )
 
-const Definitions = "definitions"
-const References = "references"
+const (
+	definitions = "definitions"
+	references  = "references"
+)
 
 type Ranges struct {
-	Entries map[Id]*Range
 	DefRefs map[Id]*DefRef
 	Hovers  *Hovers
+	Cache   *cache
 }
 
 type RawRange struct {
@@ -51,10 +53,15 @@ func NewRanges(tempDir string) (*Ranges, error) {
 		return nil, err
 	}
 
+	cache, err := newCache(tempDir, "ranges", Range{})
+	if err != nil {
+		return nil, err
+	}
+
 	return &Ranges{
-		Entries: make(map[Id]*Range),
 		DefRefs: make(map[Id]*DefRef),
 		Hovers:  hovers,
+		Cache:   cache,
 	}, nil
 }
 
@@ -84,7 +91,11 @@ func (r *Ranges) Serialize(f io.Writer, rangeIds []Id, docs map[Id]string) error
 	}
 
 	for i, rangeId := range rangeIds {
-		entry := r.Entries[rangeId]
+		entry, err := r.getRange(rangeId)
+		if err != nil {
+			continue
+		}
+
 		serializedRange := SerializedRange{
 			StartLine:      entry.Line,
 			StartChar:      entry.Character,
@@ -109,6 +120,10 @@ func (r *Ranges) Serialize(f io.Writer, rangeIds []Id, docs map[Id]string) error
 }
 
 func (r *Ranges) Close() error {
+	if err := r.Cache.Close(); err != nil {
+		return err
+	}
+
 	return r.Hovers.Close()
 }
 
@@ -129,9 +144,7 @@ func (r *Ranges) addRange(line []byte) error {
 		return err
 	}
 
-	r.Entries[rg.Id] = &rg.Data
-
-	return nil
+	return r.Cache.SetEntry(rg.Id, &rg.Data)
 }
 
 func (r *Ranges) addItem(line []byte) error {
@@ -140,26 +153,49 @@ func (r *Ranges) addItem(line []byte) error {
 		return err
 	}
 
-	if defRef.Property != Definitions && defRef.Property != References {
+	if defRef.Property != definitions && defRef.Property != references {
 		return nil
 	}
 
 	for _, rangeId := range defRef.RangeIds {
-		if entry, ok := r.Entries[rangeId]; ok {
-			entry.RefId = defRef.RefId
+		var rg Range
+		if err := r.Cache.Entry(rangeId, &rg); err != nil {
+			return err
+		}
+
+		rg.RefId = defRef.RefId
+
+		if err := r.Cache.SetEntry(rangeId, &rg); err != nil {
+			return err
 		}
 	}
 
-	if defRef.Property != Definitions {
-		return nil
+	if defRef.Property == definitions {
+		return r.addDefRef(&defRef)
 	}
 
-	defRange := r.Entries[defRef.RangeIds[0]]
+	return nil
+}
+
+func (r *Ranges) addDefRef(defRef *RawDefRef) error {
+	var rg Range
+	if err := r.Cache.Entry(defRef.RangeIds[0], &rg); err != nil {
+		return err
+	}
 
 	r.DefRefs[defRef.RefId] = &DefRef{
-		Line:  strconv.Itoa(int(defRange.Line + 1)),
+		Line:  strconv.Itoa(int(rg.Line + 1)),
 		DocId: defRef.DocId,
 	}
 
 	return nil
 }
+
+func (r *Ranges) getRange(rangeId Id) (*Range, error) {
+	var rg Range
+	if err := r.Cache.Entry(rangeId, &rg); err != nil {
+		return nil, err
+	}
+
+	return &rg, nil
+}
diff --git a/internal/lsif_transformer/parser/ranges_test.go b/internal/lsif_transformer/parser/ranges_test.go
index 988b13bb7774..bc53c66bb376 100644
--- a/internal/lsif_transformer/parser/ranges_test.go
+++ b/internal/lsif_transformer/parser/ranges_test.go
@@ -12,10 +12,14 @@ func TestRangesRead(t *testing.T) {
 	defer cleanup()
 
 	firstRange := Range{Line: 1, Character: 2, RefId: 3}
-	require.Equal(t, &firstRange, r.Entries[1])
+	rg, err := r.getRange(1)
+	require.NoError(t, err)
+	require.Equal(t, &firstRange, rg)
 
 	secondRange := Range{Line: 5, Character: 4, RefId: 3}
-	require.Equal(t, &secondRange, r.Entries[2])
+	rg, err = r.getRange(2)
+	require.NoError(t, err)
+	require.Equal(t, &secondRange, rg)
 }
 
 func TestSerialize(t *testing.T) {
-- 
GitLab