ARRAY<T>
Translation specification for the ARRAY<T> datatype from BigQuery to Snowflake
Description
In BigQuery, an array is an ordered list of zero or more elements of non-array values. Elements in an array must share the same type. (Array Type. BigQuery)
Sample Source Patterns
Setup data
BigQuery
CREATE TABLE test.arrayTable
(
col1 ARRAY<INT64>
);
CREATE TABLE test.anotherArrayTable
(
col2 ARRAY<INT64>
);
INSERT INTO test.arrayTable VALUES ([4, 10, 55]);
INSERT INTO test.arrayTable VALUES ([6, 7, 33]);
INSERT INTO test.arrayTable VALUES ([50, 12, 22]);
INSERT INTO test.anotherArrayTable VALUES ([9, 11, 52]);
INSERT INTO test.anotherArrayTable VALUES ([3, 18, 11]);
INSERT INTO test.anotherArrayTable VALUES ([33, 27, 43]);
Snowflake
CREATE TABLE test.arrayTable
(
col1 ARRAY DEFAULT []
);
CREATE TABLE test.anotherArrayTable
(
col2 ARRAY DEFAULT []
);
INSERT INTO test.arrayTable SELECT [4, 10, 55];
INSERT INTO test.arrayTable SELECT [6, 7, 33];
INSERT INTO test.arrayTable SELECT [50, 12, 22];
INSERT INTO test.anotherArrayTable SELECT [9, 11, 52];
INSERT INTO test.anotherArrayTable SELECT [50, 18, 11];
INSERT INTO test.anotherArrayTable SELECT [33, 27, 43];
INSERT with ARRAY in the VALUES clause
BigQuery
INSERT INTO test.arrayTable VALUES ([4, 10]);
INSERT INTO test.arrayTable (COL1)
VALUES ([1, 2, 3]), ([4, 5, 6]);
SELECT col1 FROM test.arrayTable ORDER BY col1[0], col1[1];
+--------------+
| col1 |
+--------------+
| [1, 2, 3] |
+--------------+
| [4, 5, 6] |
+--------------+
| [4, 10, 55] |
+--------------+
| [4, 10] |
+--------------+
| [6, 7, 33] |
+--------------+
| [50, 12, 22] |
+--------------+
Snowflake
INSERT INTO test.arrayTable SELECT [4, 10];
INSERT INTO test.arrayTable (COL1)
SELECT [1, 2, 3]
UNION ALL
SELECT [4, 5, 6];
SELECT col1 FROM test.arrayTable ORDER BY col1[0], col1[1];
+--------------+
| col1 |
+--------------+
| [1, 2, 3] |
+--------------+
| [4, 5, 6] |
+--------------+
| [4, 10, 55] |
+--------------+
| [4, 10] |
+--------------+
| [6, 7, 33] |
+--------------+
| [50, 12, 22] |
+--------------+
MERGE statement
BigQuery
MERGE INTO test.anotherArrayTable
USING test.arrayTable
ON col1[0] = col2[0]
WHEN MATCHED THEN UPDATE SET col2 = col1
WHEN NOT MATCHED THEN INSERT VALUES ([100, 100, 100]);
SELECT col2 FROM test.anotherArrayTable ORDER BY col2[0];
+-----------------+
| col2 |
+-----------------+
| [9, 11, 52] |
+-----------------+
| [33, 27, 43] |
+-----------------+
| [50, 12, 22] |
+-----------------+
| [100, 100, 100] |
+-----------------+
| [100, 100, 100] |
+-----------------+
Snowflake
MERGE INTO test.anotherArrayTable
USING test.arrayTable
ON col1[0] = col2[0]
WHEN MATCHED THEN UPDATE SET col2 = col1
WHEN NOT MATCHED THEN INSERT VALUES ([100, 100, 100]);
SELECT col2 FROM test.anotherArrayTable ORDER BY col2[0];
+-----------------+
| col2 |
+-----------------+
| [9, 11, 52] |
+-----------------+
| [33, 27, 43] |
+-----------------+
| [50, 12, 22] |
+-----------------+
| [100, 100, 100] |
+-----------------+
| [100, 100, 100] |
+-----------------+
ARRAY DEFAULT column value insertion/update
BigQuery
INSERT INTO test.arrayTable VALUES (DEFAULT);
UPDATE test.arrayTable
SET col1 = DEFAULT
WHERE TRUE;
SELECT col1 FROM test.arrayTable;
+------+
| col1 |
+------+
| [] |
+------+
| [] |
+------+
| [] |
+------+
| [] |
+------+
Snowflake
INSERT INTO test.arrayTable SELECT [];
UPDATE test.arrayTable
SET col1 = DEFAULT
WHERE TRUE;
SELECT col1 FROM test.arrayTable;
+------+
| col1 |
+------+
| [] |
+------+
| [] |
+------+
| [] |
+------+
| [] |
+------+
INSERT/UPDATE with NULL value
BigQuery
INSERT INTO test.arrayTable
SELECT
numbers
FROM
(SELECT [6] AS numbers
UNION ALL
SELECT CAST(NULL AS ARRAY<INT64>));
UPDATE test.arrayTable
SET col1 = NULL
WHERE ARRAY_LENGTH(col1) > 1;
SELECT col1 FROM test.arrayTable ORDER BY ARRAY_LENGTH(col1);
+------+
| col1 |
+------+
| [] |
+------+
| [] |
+------+
| [] |
+------+
| [] |
+------+
| [6] |
+------+
Snowflake
INSERT INTO test.arrayTable
SELECT
numbers
FROM
(SELECT [6] AS numbers
UNION ALL
SELECT IFNULL(CAST(NULL AS ARRAY), []));
UPDATE test.arrayTable
SET col1 = IFNULL(NULL, [])
WHERE ARRAY_SIZE(col1) > 1;
SELECT col1 FROM test.arrayTable ORDER BY ARRAY_SIZE(col1);
+------+
| col1 |
+------+
| [] |
+------+
| [] |
+------+
| [] |
+------+
| [] |
+------+
| [6] |
+------+
ARRAY access by index
BigQuery
SELECT
col1[0] + 4 AS byIndex,
col1[OFFSET(0)] + 4 AS byOffset,
col1[ORDINAL(1)] + 4 AS byOrdinal
FROM test.arrayTable ORDER BY col1[0];
+---------+----------+-----------+
| byIndex | byOffset | byOrdinal |
+---------+----------+-----------+
| 8 | 8 | 8 |
+---------+----------+-----------+
| 10 | 10 | 10 |
+---------+----------+-----------+
| 54 | 54 | 54 |
+---------+----------+-----------+
Snowflake
SELECT
col1[0] + 4 AS byIndex,
col1[0] + 4 AS byOffset,
col1[1 - 1] + 4 AS byOrdinal
FROM test.arrayTable ORDER BY col1[0];
+---------+----------+-----------+
| byIndex | byOffset | byOrdinal |
+---------+----------+-----------+
| 8 | 8 | 8 |
+---------+----------+-----------+
| 10 | 10 | 10 |
+---------+----------+-----------+
| 54 | 54 | 54 |
+---------+----------+-----------+
Safe ARRAY access by index
BigQuery
SELECT
col1[SAFE_OFFSET(0)] AS byOffsset,
col1[SAFE_OFFSET(-4)] AS byOffsetUnderflow,
col1[SAFE_OFFSET(500)] AS byOffsetOverflow,
col1[SAFE_ORDINAL(1)] AS byOrdinal,
col1[SAFE_ORDINAL(-4)] AS byOrdinalUnderflow,
col1[SAFE_ORDINAL(500)] AS byOrdinalOverflow
FROM test.arrayTable ORDER BY col1[0];
+----------+-------------------+------------------+-----------+--------------------+-------------------+
| byOffset | byOffsetUnderflow | byOffsetOverflow | byOrdinal | byOrdinalUnderflow | byOrdinalOverflow |
+----------+-------------------+------------------+-----------+--------------------+-------------------+
| 4 | NULL | NULL | 4 | NULL | NULL |
+----------+-------------------+------------------+-----------+--------------------+-------------------+
| 6 | NULL | NULL | 6 | NULL | NULL |
+----------+-------------------+------------------+-----------+--------------------+-------------------+
| 50 | NULL | NULL | 50 | NULL | NULL |
+----------+-------------------+------------------+-----------+--------------------+-------------------+
Snowflake
SELECT
PUBLIC.SAFE_OFFSET_UDF(col1, 0) AS byOffsset,
PUBLIC.SAFE_OFFSET_UDF(col1, -4) AS byOffsetUnderflow,
PUBLIC.SAFE_OFFSET_UDF(col1, 500) AS byOffsetOverflow,
PUBLIC.SAFE_OFFSET_UDF(col1, 1 - 1) AS byOrdinal,
PUBLIC.SAFE_OFFSET_UDF(col1, -4 - 1) AS byOrdinalUnderflow,
PUBLIC.SAFE_OFFSET_UDF(col1, 500 - 1) AS byOrdinalOverflow
FROM test.arrayTable ORDER BY col1[0];
+----------+-------------------+------------------+-----------+--------------------+-------------------+
| byOffset | byOffsetUnderflow | byOffsetOverflow | byOrdinal | byOrdinalUnderflow | byOrdinalOverflow |
+----------+-------------------+------------------+-----------+--------------------+-------------------+
| 4 | NULL | NULL | 4 | NULL | NULL |
+----------+-------------------+------------------+-----------+--------------------+-------------------+
| 6 | NULL | NULL | 6 | NULL | NULL |
+----------+-------------------+------------------+-----------+--------------------+-------------------+
| 50 | NULL | NULL | 50 | NULL | NULL |
+----------+-------------------+------------------+-----------+--------------------+-------------------+
ARRAY concatenation
BigQuery
SELECT [50, 30, 12] || [22, 33, 44] AS result;
+--------------------------+
| result |
+--------------------------+
| [50, 30, 12, 22, 33, 44] |
+--------------------------+
Snowflake
SELECT ARRAY_CAT([50, 30, 12], [22, 33, 44]) AS result;
+--------------------------+
| result |
+--------------------------+
| [50, 30, 12, 22, 33, 44] |
+--------------------------+
ARRAY used as parameter/return type
BigQuery
CREATE FUNCTION test.myArrayFunction (valuesArray ARRAY<INT64>, otherValue INTEGER)
RETURNS ARRAY<INT64>
AS
(
valuesArray || [otherValue]
);
SELECT test.myArrayFunction([5, 20, 10], 55) AS result;
+-----------------+
| result |
+-----------------+
| [5, 20, 10, 55] |
+-----------------+
Snowflake
CREATE FUNCTION test.myArrayFunction (valuesArray ARRAY, otherValue INTEGER)
RETURNS ARRAY
AS
$$
ARRAY_CAT(valuesArray, [otherValue])
$$;
SELECT test.myArrayFunction([5, 20, 10], 55) AS result;
+-----------------+
| result |
+-----------------+
| [5, 20, 10, 55] |
+-----------------+
Known Issues
1. Non-safe ARRAY access will not fail for positive out of bounds indexes
In BigQuery, accessing an array element by index will fail for any index value that is too low (underflow) or too high (overflow) when not using SAFE_OFFSET or SAFE_ORDINAL. However, in Snowflake errors are thrown only for underflow cases, any index that would case an overflow error will generate a NULL value instead.
When non-safe access to elements in an array is detected SnowConvert will generate MSCINF0055 to warn the user about this.
Related EWIs
MSCINF0057: Accessing arrays produces NULL instead of an error for positive out of bounds indexes in Snowflake.
Last updated