#author("2024-02-07T12:19:08+09:00","default:dex","dex")
#author("2024-02-07T12:24:47+09:00","default:dex","dex")
#contents

* Athena query Example [#ma69aeb6]

----
** syslog [#c0d9b538]

- create table:
#geshi(sql){{
CREATE EXTERNAL TABLE IF NOT EXISTS host_log_syslog (
  time string,
  host string,
  ident string,
  message string,
  ec2_instance_id string,
  site string
)
COMMENT 'host_log_syslog table'
PARTITIONED BY (
  log_id string,
  year string,
  month string,
  day string,
  hour string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION 's3://example-host-log/host'
TBLPROPERTIES(
  'projection.enabled' = 'true',
  "projection.log_id.type" = "injected",
  "projection.year.type" = 'date',
  "projection.year.range" = 'NOW-1YEARS,NOW',
  "projection.year.format" = 'yyyy',
  "projection.year.interval" = '1' ,
  "projection.year.interval.unit" = 'YEARS',
  'projection.month.type'='integer',
  'projection.month.range'='1,12',
  'projection.month.interval'='1',
  'projection.month.digits'='2',
  'projection.day.type'='integer',
  'projection.day.range'='1,31',
  'projection.day.interval'='1',
  'projection.day.digits'='2',
  'projection.hour.type'='integer',
  'projection.hour.range'='0,23',
  'projection.hour.interval'='1',
  'projection.hour.digits'='2',
  'storage.location.template' = 's3://example-host-log/host/log_id=${log_id}/year=${year}/month=${month}/day=${day}/hour=${hour}'
);

}}


- "error" を含む行をday, host毎に集計
#geshi(sql){{
select 
substr(time, 1, 7) as dt,
host,
count(host) as error_count
from host_log_syslog
where
  log_id = 'syslog.messages'
  and year || month || day || hour
  between '2024010100' and '2024010700'
  and message like '%error%'
group by substr(time, 1, 7), host
order by dt,host
}}

- "error"を含む行の詳細と件数、top 10
#geshi(sql){{
select 
 regexp_extract(message, '(error.*)', 1) as error_msg,
 count(message) as count
from host_log_syslog
where
  log_id = 'syslog.messages'
  and year || month || day || hour
  between '2024010100' and '2024010700'
  and message like '%error%'
group by regexp_extract(message, '(\[error\].*)', 1)
order by count desc
limit 10
}}

トップ   編集 差分 バックアップ 添付 複製 名前変更 リロード   新規 一覧 検索 最終更新   ヘルプ   最終更新のRSS