#author("2024-02-07T12:19:08+09:00","default:dex","dex") #author("2024-02-07T12:24:47+09:00","default:dex","dex") #contents * Athena query Example [#ma69aeb6] ---- ** syslog [#c0d9b538] - create table: #geshi(sql){{ CREATE EXTERNAL TABLE IF NOT EXISTS host_log_syslog ( time string, host string, ident string, message string, ec2_instance_id string, site string ) COMMENT 'host_log_syslog table' PARTITIONED BY ( log_id string, year string, month string, day string, hour string ) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' LOCATION 's3://example-host-log/host' TBLPROPERTIES( 'projection.enabled' = 'true', "projection.log_id.type" = "injected", "projection.year.type" = 'date', "projection.year.range" = 'NOW-1YEARS,NOW', "projection.year.format" = 'yyyy', "projection.year.interval" = '1' , "projection.year.interval.unit" = 'YEARS', 'projection.month.type'='integer', 'projection.month.range'='1,12', 'projection.month.interval'='1', 'projection.month.digits'='2', 'projection.day.type'='integer', 'projection.day.range'='1,31', 'projection.day.interval'='1', 'projection.day.digits'='2', 'projection.hour.type'='integer', 'projection.hour.range'='0,23', 'projection.hour.interval'='1', 'projection.hour.digits'='2', 'storage.location.template' = 's3://example-host-log/host/log_id=${log_id}/year=${year}/month=${month}/day=${day}/hour=${hour}' ); }} - "error" を含む行をday, host毎に集計 #geshi(sql){{ select substr(time, 1, 7) as dt, host, count(host) as error_count from host_log_syslog where log_id = 'syslog.messages' and year || month || day || hour between '2024010100' and '2024010700' and message like '%error%' group by substr(time, 1, 7), host order by dt,host }} - "error"を含む行の詳細と件数、top 10 #geshi(sql){{ select regexp_extract(message, '(error.*)', 1) as error_msg, count(message) as count from host_log_syslog where log_id = 'syslog.messages' and year || month || day || hour between '2024010100' and '2024010700' and message like '%error%' group by regexp_extract(message, '(\[error\].*)', 1) order by count desc limit 10 }}