Попробуйте с символом unicode для точки с запятой, то есть \ u003B
hive> CREATE TABLE r_test (foo INT, bar STRING, address STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.MultiDelimitSerDe'
WITH SERDEPROPERTIES (
"field.delim"="<=>",
"collection.delim"="\u003B",
"mapkey.delim"="@"
);
Я создал таблицу с символом Unicode и проверил collection.delim is; ниже:
hive> desc formatted r_test;
| Storage Desc Params:| NULL | NULL |
| | collection.delim | ; |
| | field.delim | <=> |
| | mapkey.delim | @ |
| | serialization.format | 1 |
Ниже для BigQuery Standard SQL
#standardSQL
SELECT Worksite, COUNTIF(completed = 1) completed
FROM (
SELECT
Checklists.worksite_id AS `Worksite`,
ARRAY_AGG(completed ORDER BY completed DESC LIMIT 1)[OFFSET(0)] completed
FROM `project.dataset.Checklists` Checklists
LEFT JOIN `project.dataset.ChecklistCompletions` ChecklistCompletions
ON Checklists.id = ChecklistCompletions.id
GROUP BY Checklists.id, Worksite
) GROUP BY worksite
, если применить к образцу данных по вашему вопросу, вы получите результат (как и ожидалось)
Row Worksite completed
1 worksite_1 4
2 worksite_2 2
Вы можете проверить, поиграть с выше, используя ниже
#standardSQL
WITH `project.dataset.ChecklistCompletions` AS (
SELECT "1" id, "u12345" uid, 1 completed, TIMESTAMP '2019-01-09 00:00:00' ts UNION ALL
SELECT "1", "u12345", 0, '2019-01-10 00:00:00' UNION ALL
SELECT "1", "u12345", 1, '2019-01-11 00:00:00' UNION ALL
SELECT "2", "u12345", 0, '2019-01-13 00:00:00' UNION ALL
SELECT "3", "u12345", 1, '2019-01-12 00:00:00' UNION ALL
SELECT "4", "u12345", 1, '2019-01-13 00:00:00' UNION ALL
SELECT "5", "u12345", 1, '2019-01-12 00:00:00' UNION ALL
SELECT "6", "u12345", 0, '2019-01-17 00:00:00' UNION ALL
SELECT "7", "u1", 1, '2019-01-10 00:00:00' UNION ALL
SELECT "8", "u1", 0, '2019-01-12 00:00:00' UNION ALL
SELECT "9", "u1", 1, '2019-01-15 00:05:00' UNION ALL
SELECT "10", "u1", 0, '2019-01-15 00:00:00'
), `project.dataset.Checklists` AS (
SELECT "1" id, "u12345" uid, "worksite_1" worksite_id, TIMESTAMP '2019-01-09 00:00:00' ts UNION ALL
SELECT "2", "u12345", "worksite_2", '2019-01-13 00:00:00' UNION ALL
SELECT "3", "u12345", "worksite_2", '2019-01-12 00:00:00' UNION ALL
SELECT "4", "u12345", "worksite_1", '2019-01-13 00:00:00' UNION ALL
SELECT "5", "u12345", "worksite_2", '2019-01-12 00:00:00' UNION ALL
SELECT "6", "u12345", "worksite_1", '2019-01-17 00:00:00' UNION ALL
SELECT "7", "u1", "worksite_1", '2019-01-10 00:00:00' UNION ALL
SELECT "8", "u1", "worksite_1", '2019-01-12 00:00:00' UNION ALL
SELECT "9", "u1", "worksite_1", '2019-01-15 00:05:00' UNION ALL
SELECT "10", "u1", "worksite_2", '2019-01-15 00:00:00'
)
SELECT Worksite, COUNTIF(completed = 1) completed
FROM (
SELECT
Checklists.worksite_id AS `Worksite`,
ARRAY_AGG(completed ORDER BY completed DESC LIMIT 1)[OFFSET(0)] completed
FROM `project.dataset.Checklists` Checklists
LEFT JOIN `project.dataset.ChecklistCompletions` ChecklistCompletions
ON Checklists.id = ChecklistCompletions.id
GROUP BY Checklists.id, Worksite
) GROUP BY worksite
ORDER BY worksite