From 07c5de6e2741c59c61bcdeedb331a4c0e13b155f Mon Sep 17 00:00:00 2001 From: Igor Drozdov <idrozdov@gitlab.com> Date: Thu, 21 May 2020 09:31:31 +0300 Subject: [PATCH] Cache ranges value in a file instead of a hash Storing them in a hash consumes a lot of RAM Introducing a file helps to reduce RAM consumption significantly --- internal/lsif_transformer/parser/cache.go | 56 ++++++++++++++++ .../lsif_transformer/parser/cache_test.go | 33 ++++++++++ internal/lsif_transformer/parser/ranges.go | 66 ++++++++++++++----- .../lsif_transformer/parser/ranges_test.go | 8 ++- 4 files changed, 146 insertions(+), 17 deletions(-) create mode 100644 internal/lsif_transformer/parser/cache.go create mode 100644 internal/lsif_transformer/parser/cache_test.go diff --git a/internal/lsif_transformer/parser/cache.go b/internal/lsif_transformer/parser/cache.go new file mode 100644 index 000000000000..f2a695d1734e --- /dev/null +++ b/internal/lsif_transformer/parser/cache.go @@ -0,0 +1,56 @@ +package parser + +import ( + "encoding/binary" + "io" + "io/ioutil" + "os" +) + +// This cache implementation is using a temp file to provide key-value data storage +// It allows to avoid storing intermediate calculations in RAM +// The stored data must be a fixed-size value or a slice of fixed-size values, or a pointer to such data +type cache struct { + file *os.File + chunkSize int64 +} + +func newCache(tempDir, filename string, data interface{}) (*cache, error) { + f, err := ioutil.TempFile(tempDir, filename) + if err != nil { + return nil, err + } + + return &cache{file: f, chunkSize: int64(binary.Size(data))}, nil +} + +func (c *cache) SetEntry(id Id, data interface{}) error { + if err := c.setOffset(id); err != nil { + return err + } + + return binary.Write(c.file, binary.LittleEndian, data) +} + +func (c *cache) Entry(id Id, data interface{}) error { + if err := c.setOffset(id); err != nil { + return err + } + + return binary.Read(c.file, binary.LittleEndian, data) +} + +func (c *cache) Close() error { + if err := c.file.Close(); err != nil { + return err + } + + return os.Remove(c.file.Name()) +} + +func (c *cache) setOffset(id Id) error { + offset := int64(id) * c.chunkSize + _, err := c.file.Seek(offset, io.SeekStart) + + return err +} diff --git a/internal/lsif_transformer/parser/cache_test.go b/internal/lsif_transformer/parser/cache_test.go new file mode 100644 index 000000000000..23a2ac6e9a91 --- /dev/null +++ b/internal/lsif_transformer/parser/cache_test.go @@ -0,0 +1,33 @@ +package parser + +import ( + "io/ioutil" + "testing" + + "github.com/stretchr/testify/require" +) + +type chunk struct { + A int16 + B int16 +} + +func TestCache(t *testing.T) { + cache, err := newCache("", "test-chunks", chunk{}) + require.NoError(t, err) + defer cache.Close() + + c := chunk{A: 1, B: 2} + require.NoError(t, cache.SetEntry(1, &c)) + require.NoError(t, cache.setOffset(0)) + + content, err := ioutil.ReadAll(cache.file) + require.NoError(t, err) + + expected := []byte{0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x2, 0x0} + require.Equal(t, expected, content) + + var nc chunk + require.NoError(t, cache.Entry(1, &nc)) + require.Equal(t, c, nc) +} diff --git a/internal/lsif_transformer/parser/ranges.go b/internal/lsif_transformer/parser/ranges.go index 0d760f53d6c8..1740b7238ec9 100644 --- a/internal/lsif_transformer/parser/ranges.go +++ b/internal/lsif_transformer/parser/ranges.go @@ -6,13 +6,15 @@ import ( "strconv" ) -const Definitions = "definitions" -const References = "references" +const ( + definitions = "definitions" + references = "references" +) type Ranges struct { - Entries map[Id]*Range DefRefs map[Id]*DefRef Hovers *Hovers + Cache *cache } type RawRange struct { @@ -51,10 +53,15 @@ func NewRanges(tempDir string) (*Ranges, error) { return nil, err } + cache, err := newCache(tempDir, "ranges", Range{}) + if err != nil { + return nil, err + } + return &Ranges{ - Entries: make(map[Id]*Range), DefRefs: make(map[Id]*DefRef), Hovers: hovers, + Cache: cache, }, nil } @@ -84,7 +91,11 @@ func (r *Ranges) Serialize(f io.Writer, rangeIds []Id, docs map[Id]string) error } for i, rangeId := range rangeIds { - entry := r.Entries[rangeId] + entry, err := r.getRange(rangeId) + if err != nil { + continue + } + serializedRange := SerializedRange{ StartLine: entry.Line, StartChar: entry.Character, @@ -109,6 +120,10 @@ func (r *Ranges) Serialize(f io.Writer, rangeIds []Id, docs map[Id]string) error } func (r *Ranges) Close() error { + if err := r.Cache.Close(); err != nil { + return err + } + return r.Hovers.Close() } @@ -129,9 +144,7 @@ func (r *Ranges) addRange(line []byte) error { return err } - r.Entries[rg.Id] = &rg.Data - - return nil + return r.Cache.SetEntry(rg.Id, &rg.Data) } func (r *Ranges) addItem(line []byte) error { @@ -140,26 +153,49 @@ func (r *Ranges) addItem(line []byte) error { return err } - if defRef.Property != Definitions && defRef.Property != References { + if defRef.Property != definitions && defRef.Property != references { return nil } for _, rangeId := range defRef.RangeIds { - if entry, ok := r.Entries[rangeId]; ok { - entry.RefId = defRef.RefId + var rg Range + if err := r.Cache.Entry(rangeId, &rg); err != nil { + return err + } + + rg.RefId = defRef.RefId + + if err := r.Cache.SetEntry(rangeId, &rg); err != nil { + return err } } - if defRef.Property != Definitions { - return nil + if defRef.Property == definitions { + return r.addDefRef(&defRef) } - defRange := r.Entries[defRef.RangeIds[0]] + return nil +} + +func (r *Ranges) addDefRef(defRef *RawDefRef) error { + var rg Range + if err := r.Cache.Entry(defRef.RangeIds[0], &rg); err != nil { + return err + } r.DefRefs[defRef.RefId] = &DefRef{ - Line: strconv.Itoa(int(defRange.Line + 1)), + Line: strconv.Itoa(int(rg.Line + 1)), DocId: defRef.DocId, } return nil } + +func (r *Ranges) getRange(rangeId Id) (*Range, error) { + var rg Range + if err := r.Cache.Entry(rangeId, &rg); err != nil { + return nil, err + } + + return &rg, nil +} diff --git a/internal/lsif_transformer/parser/ranges_test.go b/internal/lsif_transformer/parser/ranges_test.go index 988b13bb7774..bc53c66bb376 100644 --- a/internal/lsif_transformer/parser/ranges_test.go +++ b/internal/lsif_transformer/parser/ranges_test.go @@ -12,10 +12,14 @@ func TestRangesRead(t *testing.T) { defer cleanup() firstRange := Range{Line: 1, Character: 2, RefId: 3} - require.Equal(t, &firstRange, r.Entries[1]) + rg, err := r.getRange(1) + require.NoError(t, err) + require.Equal(t, &firstRange, rg) secondRange := Range{Line: 5, Character: 4, RefId: 3} - require.Equal(t, &secondRange, r.Entries[2]) + rg, err = r.getRange(2) + require.NoError(t, err) + require.Equal(t, &secondRange, rg) } func TestSerialize(t *testing.T) { -- GitLab