BigQuery Подсчитывать только последний элемент по метке времени

Попробуйте с символом unicode для точки с запятой, то есть \ u003B

hive> CREATE TABLE r_test (foo INT, bar STRING, address  STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.MultiDelimitSerDe'
WITH SERDEPROPERTIES (
"field.delim"="<=>",
"collection.delim"="\u003B",
"mapkey.delim"="@"
    );

Я создал таблицу с символом Unicode и проверил collection.delim is; ниже:

hive> desc formatted r_test;
    | Storage Desc Params:| NULL                 | NULL                        |
    |                     | collection.delim     | ;                           |
    |                     | field.delim          | <=>                         |
    |                     | mapkey.delim         | @                           |
    |                     | serialization.format | 1                           |
0
задан Lisa Yin 19 February 2019 в 19:37
поделиться

1 ответ

Ниже для BigQuery Standard SQL

#standardSQL
SELECT Worksite, COUNTIF(completed = 1) completed
FROM (
  SELECT 
      Checklists.worksite_id AS `Worksite`,
      ARRAY_AGG(completed ORDER BY completed DESC LIMIT 1)[OFFSET(0)] completed
  FROM `project.dataset.Checklists` Checklists
  LEFT JOIN `project.dataset.ChecklistCompletions` ChecklistCompletions
  ON Checklists.id = ChecklistCompletions.id
  GROUP BY Checklists.id, Worksite
) GROUP BY worksite

, если применить к образцу данных по вашему вопросу, вы получите результат (как и ожидалось)

Row Worksite    completed    
1   worksite_1  4    
2   worksite_2  2     

Вы можете проверить, поиграть с выше, используя ниже

#standardSQL
WITH `project.dataset.ChecklistCompletions` AS (
  SELECT "1" id,     "u12345" uid,   1 completed, TIMESTAMP '2019-01-09 00:00:00' ts UNION ALL
  SELECT "1",     "u12345",   0, '2019-01-10 00:00:00' UNION ALL
  SELECT "1",     "u12345",   1, '2019-01-11 00:00:00' UNION ALL
  SELECT "2",     "u12345",   0, '2019-01-13 00:00:00' UNION ALL
  SELECT "3",     "u12345",   1, '2019-01-12 00:00:00' UNION ALL
  SELECT "4",     "u12345",   1, '2019-01-13 00:00:00' UNION ALL
  SELECT "5",     "u12345",   1, '2019-01-12 00:00:00' UNION ALL
  SELECT "6",     "u12345",   0, '2019-01-17 00:00:00' UNION ALL
  SELECT "7",     "u1",       1, '2019-01-10 00:00:00' UNION ALL
  SELECT "8",     "u1",       0, '2019-01-12 00:00:00' UNION ALL
  SELECT "9",     "u1",       1, '2019-01-15 00:05:00' UNION ALL
  SELECT "10",    "u1",       0, '2019-01-15 00:00:00' 
), `project.dataset.Checklists` AS (
  SELECT "1" id,     "u12345" uid,   "worksite_1" worksite_id, TIMESTAMP '2019-01-09 00:00:00' ts UNION ALL
  SELECT "2",     "u12345",   "worksite_2", '2019-01-13 00:00:00' UNION ALL
  SELECT "3",     "u12345",   "worksite_2", '2019-01-12 00:00:00' UNION ALL
  SELECT "4",     "u12345",   "worksite_1", '2019-01-13 00:00:00' UNION ALL
  SELECT "5",     "u12345",   "worksite_2", '2019-01-12 00:00:00' UNION ALL
  SELECT "6",     "u12345",   "worksite_1", '2019-01-17 00:00:00' UNION ALL
  SELECT "7",     "u1",       "worksite_1", '2019-01-10 00:00:00' UNION ALL
  SELECT "8",     "u1",       "worksite_1", '2019-01-12 00:00:00' UNION ALL
  SELECT "9",     "u1",       "worksite_1", '2019-01-15 00:05:00' UNION ALL
  SELECT "10",    "u1",       "worksite_2", '2019-01-15 00:00:00' 
)
SELECT Worksite, COUNTIF(completed = 1) completed
FROM (
  SELECT 
      Checklists.worksite_id AS `Worksite`,
      ARRAY_AGG(completed ORDER BY completed DESC LIMIT 1)[OFFSET(0)] completed
  FROM `project.dataset.Checklists` Checklists
  LEFT JOIN `project.dataset.ChecklistCompletions` ChecklistCompletions
  ON Checklists.id = ChecklistCompletions.id
  GROUP BY Checklists.id, Worksite
) GROUP BY worksite
ORDER BY worksite
0
ответ дан Mikhail Berlyant 19 February 2019 в 19:37
поделиться
Другие вопросы по тегам:

Похожие вопросы: