diff --git a/app/assets/javascripts/behaviors/components/json_table.vue b/app/assets/javascripts/behaviors/components/json_table.vue index bb38d80c1b54d3aa0dcde614aedea0a43999e719..9cbaa02f27026c2c7b7b9660bf8e12894ecacb3d 100644 --- a/app/assets/javascripts/behaviors/components/json_table.vue +++ b/app/assets/javascripts/behaviors/components/json_table.vue @@ -42,6 +42,7 @@ export default { key: field.key, label: field.label, sortable: field.sortable || false, + class: field.class || [], }; }); }, diff --git a/app/assets/javascripts/notebook/cells/output/dataframe_util.js b/app/assets/javascripts/notebook/cells/output/dataframe_util.js index 2fdaaced0b992c0cff677ae628ed0641982cb449..41149875d6cb334a02ce6bdddfcbc709a28f2fc2 100644 --- a/app/assets/javascripts/notebook/cells/output/dataframe_util.js +++ b/app/assets/javascripts/notebook/cells/output/dataframe_util.js @@ -1,5 +1,70 @@ import { sanitize } from '~/lib/dompurify'; +function parseItems(itemIndexes, itemColumns) { + // Fetching items: if the dataframe has a single column index, the table is simple + // 0: tr > th(index0 value) th(column0 value) th(column1 value) + // 1: tr > th(index0 value) th(column0 value) th(column1 value) + // + // But if the dataframe has multiple column indexes, it uses rowspan, and the row below won't have a value for that + // index. + // 0: tr > th(index0 value, rowspan=2) th(index1 value) th(column0 value) th(column1 value) + // 1: tr > th(index1 value) th(column0 value) th(column1 value) + // + // So, when parsing row 1, and the count of <th> elements is less than indexCount, we fill with the first + // values of row 0 + const indexCount = itemIndexes[0].length; + const rowCount = itemIndexes.length; + + const filledIndexes = itemIndexes.map((row, rowIndex) => { + const indexesInRow = row.length; + if (indexesInRow === indexCount) { + return row; + } + return itemIndexes[rowIndex - 1].slice(0, -indexesInRow).concat(row); + }); + + const items = Array(rowCount); + + for (let row = 0; row < rowCount; row += 1) { + items[row] = { + ...Object.fromEntries(filledIndexes[row].map((value, counter) => [`index${counter}`, value])), + ...Object.fromEntries(itemColumns[row].map((value, counter) => [`column${counter}`, value])), + }; + } + return items; +} + +function labelsToFields(labels, isIndex = true) { + return labels.map((label, counter) => ({ + key: isIndex ? `index${counter}` : `column${counter}`, + label, + sortable: true, + class: isIndex ? 'gl-font-weight-bold' : '', + })); +} + +function parseFields(columnAndIndexLabels, indexCount, columnCount) { + // Fetching the labels: if the dataframe has a single column index, it will be in the format: + // thead + // tr + // th(index0 label) th(column0 label) th(column1 label) + // + // If there are multiple index columns, it the header will actually have two rows: + // thead + // tr + // th() th() th(column 0 label) th(column1 label) + // tr + // th(index0 label) th(index1 label) th() th() + + const columnLabels = columnAndIndexLabels[0].slice(-columnCount); + const indexLabels = columnAndIndexLabels[columnAndIndexLabels.length - 1].slice(0, indexCount); + + const indexFields = labelsToFields(indexLabels, true); + const columnFields = labelsToFields(columnLabels, false); + + return [...indexFields, ...columnFields]; +} + /** * Converts a dataframe in the output of a Jupyter Notebook cell to a json object * @@ -13,27 +78,26 @@ export function convertHtmlTableToJson(input, domParser) { if (!htmlDoc) return { fields: [], items: [] }; - const columnNames = [...htmlDoc.querySelectorAll('table > thead th')].map( - (head) => head.innerText, + const columnAndIndexLabels = [...htmlDoc.querySelectorAll('table > thead tr')].map((row) => + [...row.querySelectorAll('th')].map((item) => item.innerText), ); - if (!columnNames) return { fields: [], items: [] }; + if (columnAndIndexLabels.length === 0) return { fields: [], items: [] }; - const itemValues = [...htmlDoc.querySelectorAll('table > tbody > tr')].map((row) => + const tableRows = [...htmlDoc.querySelectorAll('table > tbody > tr')]; + + const itemColumns = tableRows.map((row) => [...row.querySelectorAll('td')].map((item) => item.innerText), ); - return { - fields: columnNames.map((column) => ({ - key: column === '' ? 'index' : column, - label: column, - sortable: true, - })), - items: itemValues.map((values, itemIndex) => ({ - index: itemIndex, - ...Object.fromEntries(values.map((value, index) => [columnNames[index + 1], value])), - })), - }; + const itemIndexes = tableRows.map((row) => + [...row.querySelectorAll('th')].map((item) => item.innerText), + ); + + const fields = parseFields(columnAndIndexLabels, itemIndexes[0].length, itemColumns[0].length); + const items = parseItems(itemIndexes, itemColumns); + + return { fields, items }; } export function isDataframe(output) { diff --git a/spec/frontend/behaviors/components/json_table_spec.js b/spec/frontend/behaviors/components/json_table_spec.js index a82310873ed9b9073a8a7fc59b1e3b52355c246c..ae62d28d6c0d1bc203f627691e64de6d738c41fa 100644 --- a/spec/frontend/behaviors/components/json_table_spec.js +++ b/spec/frontend/behaviors/components/json_table_spec.js @@ -12,6 +12,7 @@ const TEST_FIELDS = [ label: 'Second', sortable: true, other: 'foo', + class: 'someClass', }, { key: 'C', @@ -127,11 +128,13 @@ describe('behaviors/components/json_table', () => { key: 'B', label: 'Second', sortable: true, + class: 'someClass', }, { key: 'C', label: 'Third', sortable: false, + class: [], }, 'D', ], diff --git a/spec/frontend/notebook/cells/output/dataframe_spec.js b/spec/frontend/notebook/cells/output/dataframe_spec.js index abf6631353c3614c14573398d693fa654a442af7..bf90497a36ba78c27414cf99bed379afafb44e94 100644 --- a/spec/frontend/notebook/cells/output/dataframe_spec.js +++ b/spec/frontend/notebook/cells/output/dataframe_spec.js @@ -35,16 +35,16 @@ describe('~/notebook/cells/output/DataframeOutput', () => { it('sets the correct fields', () => { expect(findTable().props().fields).toEqual([ - { key: 'index', label: '', sortable: true }, - { key: 'column_1', label: 'column_1', sortable: true }, - { key: 'column_2', label: 'column_2', sortable: true }, + { key: 'index0', label: '', sortable: true, class: 'gl-font-weight-bold' }, + { key: 'column0', label: 'column_1', sortable: true, class: '' }, + { key: 'column1', label: 'column_2', sortable: true, class: '' }, ]); }); it('sets the correct items', () => { expect(findTable().props().items).toEqual([ - { index: 0, column_1: 'abc de f', column_2: 'a' }, - { index: 1, column_1: 'True', column_2: '0.1' }, + { index0: '0', column0: 'abc de f', column1: 'a' }, + { index0: '1', column0: 'True', column1: '0.1' }, ]); }); }); diff --git a/spec/frontend/notebook/cells/output/dataframe_util_spec.js b/spec/frontend/notebook/cells/output/dataframe_util_spec.js index ddc1b3cfe26451d701b190f76580c5e3a005a25c..37dee5429e45e43dba833c76753b8737b5c30976 100644 --- a/spec/frontend/notebook/cells/output/dataframe_util_spec.js +++ b/spec/frontend/notebook/cells/output/dataframe_util_spec.js @@ -1,5 +1,5 @@ import { isDataframe, convertHtmlTableToJson } from '~/notebook/cells/output/dataframe_util'; -import { outputWithDataframeContent } from '../../mock_data'; +import { outputWithDataframeContent, outputWithMultiIndexDataFrame } from '../../mock_data'; import sanitizeTests from './html_sanitize_fixtures'; describe('notebook/cells/output/dataframe_utils', () => { @@ -43,13 +43,33 @@ describe('notebook/cells/output/dataframe_utils', () => { const output = { fields: [ - { key: 'index', label: '', sortable: true }, - { key: 'column_1', label: 'column_1', sortable: true }, - { key: 'column_2', label: 'column_2', sortable: true }, + { key: 'index0', label: '', sortable: true, class: 'gl-font-weight-bold' }, + { key: 'column0', label: 'column_1', sortable: true, class: '' }, + { key: 'column1', label: 'column_2', sortable: true, class: '' }, ], items: [ - { index: 0, column_1: 'abc de f', column_2: 'a' }, - { index: 1, column_1: 'True', column_2: '0.1' }, + { index0: '0', column0: 'abc de f', column1: 'a' }, + { index0: '1', column0: 'True', column1: '0.1' }, + ], + }; + + expect(convertHtmlTableToJson(input)).toEqual(output); + }); + + it('converts multi-index table correctly', () => { + const input = outputWithMultiIndexDataFrame; + + const output = { + fields: [ + { key: 'index0', label: 'first', sortable: true, class: 'gl-font-weight-bold' }, + { key: 'index1', label: 'second', sortable: true, class: 'gl-font-weight-bold' }, + { key: 'column0', label: '0', sortable: true, class: '' }, + ], + items: [ + { index0: 'bar', index1: 'one', column0: '1' }, + { index0: 'bar', index1: 'two', column0: '2' }, + { index0: 'baz', index1: 'one', column0: '3' }, + { index0: 'baz', index1: 'two', column0: '4' }, ], }; @@ -96,7 +116,7 @@ describe('notebook/cells/output/dataframe_utils', () => { ['svg', 3], ])('sanitizes output for: %p', (tag, index) => { const inputHtml = makeDataframeWithHtml(sanitizeTests[index][1].input); - const convertedHtml = convertHtmlTableToJson(inputHtml).items[0].column_1; + const convertedHtml = convertHtmlTableToJson(inputHtml).items[0].column0; expect(convertedHtml).not.toContain(tag); }); diff --git a/spec/frontend/notebook/mock_data.js b/spec/frontend/notebook/mock_data.js index 15db2931b3c2f62b9e55696748e15ff86367e340..9c63ad773b5421e0a890570ca88f41bc3e883189 100644 --- a/spec/frontend/notebook/mock_data.js +++ b/spec/frontend/notebook/mock_data.js @@ -45,6 +45,58 @@ export const outputWithDataframeContent = [ '</div>', ]; +export const outputWithMultiIndexDataFrame = [ + '<div>\n', + '<style scoped>\n', + ' .dataframe tbody tr th:only-of-type {\n', + ' vertical-align: middle;\n', + ' }\n', + '\n', + ' .dataframe tbody tr th {\n', + ' vertical-align: top;\n', + ' }\n', + '\n', + ' .dataframe thead th {\n', + ' text-align: right;\n', + ' }\n', + '</style>\n', + '<table border="1" class="dataframe">\n', + ' <thead>\n', + ' <tr style="text-align: right;">\n', + ' <th></th>\n', + ' <th></th>\n', + ' <th>0</th>\n', + ' </tr>\n', + ' <tr>\n', + ' <th>first</th>\n', + ' <th>second</th>\n', + ' <th></th>\n', + ' </tr>\n', + ' </thead>\n', + ' <tbody>\n', + ' <tr>\n', + ' <th rowspan="2" valign="top">bar</th>\n', + ' <th>one</th>\n', + ' <td>1</td>\n', + ' </tr>\n', + ' <tr>\n', + ' <th>two</th>\n', + ' <td>2</td>\n', + ' </tr>\n', + ' <tr>\n', + ' <th rowspan="2" valign="top">baz</th>\n', + ' <th>one</th>\n', + ' <td>3</td>\n', + ' </tr>\n', + ' <tr>\n', + ' <th>two</th>\n', + ' <td>4</td>\n', + ' </tr>\n', + ' </tbody>\n', + '</table>\n', + '</div>', +]; + export const outputWithDataframe = { data: { 'text/html': outputWithDataframeContent,