본문 바로가기
Data Analysis/SQL

[SQL] Presto SQL 관련 유용한 꿀팁들

by Hagrid 2024. 8. 6.
반응형
  • Array형태로 이루어진 칼럼에 like절
cardinality(filter(Col, x -> x like 'a')) > 0
  • Json 형태로 이루어진 칼럼을 scalar 형태로 추출
json_extract_scalar(Col,'$.A')
  • string 값 json parsing 하기
with a as (
 select '{\\"popup_title\\":\\"MAX_LEVEL\\",\\"max_invitation\\":2,\\"current_invitation\\":1}' as col
-- select '{popup_title: "MAX_LEVEL", max_invitation: 2, current_invitation: 1}'  as col
)
-- "{\\"popup_title\\":\\"MAX_LEVEL\\",\\"max_invitation\\":2,\\"current_invitation\\":1}"
-- "{popup_title: MAX_LEVEL, max_invitation: 2, current_invitation: 1}"
select *,
json_extract_scalar(replace(col,'\\',''),'$.current_invitation') as a
-- json_extract_scalar(col,'$.popup_title') as a
from a
  • 복수의 like절
regexp_like(Col, 'A|B|C|D|E')
  • Varchar -> timestamp
date_parse(Varchar,'%Y-%m-%d %H:%i:%s')
  • timestamp -> varchar
date_format(timestamp,'%Y%m%d')
  • timestamp에서 특정 unit 추출
extract(day from timestamp)
  • 특정 week 추출
week_of_year(to_timestamp(yyyymmdd, 'yyyymmdd')) AS woy
  • timestampe 계산
date_diff(unit,timestamp1,timestamp2)
  • 최근 7주 조건
where yyyymmdd >= date_format(date_add('day', -42 - day_of_week(now()), now()), '%Y%m%d')
  • 이중 조인(두가지 조건이 맞아 떨어져야 조인 가능)
table a join table b on a.1 = b.1 and a.2 = b.2
  • 첫 주차 월요일 추출
to_char(date_trunc('week', date(current_timestamp AT TIME ZONE 'Asia/Seoul')), 'yyyymmdd')
  • NULL값 변경
coalesce(col,'변환값')
  • 문자열 대치
replace(cols,'바꿀 문자','바뀔 문자')
  • Skewed 정도 확인
skewness(cols)
  • 상관계수(수치형 col)
corr(col1,col2)
  • 표준편차
stddev_pop(col)
  • 중앙값
approx_percentile(col,0.5)
  • Log화
ln(col)
log2(col)
log10(col)
  • 특정 문자 추출
substr(col,시작(1),끝(6))
-> 1부터 6번째까지의 문자열이 추출됨 substr(heyjude,1,6) = 'heyjud'
  • # 제외하는법
  • , COALESCE(LENGTH(regexp_replace(description, '#[^\\s]+', '')), 0) AS text_len_wo_hash
    
  • HTML 태그를 제거하고 텍스트의 본문 길이와 줄 수를 측정
WITH cleaned_data AS (
    SELECT
        description,
        regexp_replace(description, '<[^>]*>', '') AS cleaned_description
    FROM
        table_name
),
final_data AS (
    SELECT
        cleaned_description,
        LENGTH(cleaned_description) AS full_text_length,
        COALESCE(LENGTH(regexp_replace(cleaned_description, '#[^\\s]+', '')), 0) AS text_length_without_hashtags,
        LENGTH(regexp_replace(cleaned_description, '[^\\n]', '')) + 1 AS line_count,
        COALESCE(LENGTH(regexp_replace(regexp_replace(cleaned_description, '(^|\\n)#[^\\n]*', ''), '[^\\n]', '')) + 1, 0) AS line_count_without_hashtags
    FROM
        cleaned_data
)
SELECT
    description,
    full_text_length,
    text_length_without_hashtags,
    line_count,
    line_count_without_hashtags
FROM
    final_data;

 

  • 줄 수 (line count) 세는법

 

, COALESCE(LENGTH(description) - LENGTH(regexp_replace(description, '\\n', '')) + 1, 0) AS line_count
from dump.card_collections CC

 


Presto 0.230 DocumentationAggregate functions operate on a set of values to compute a single result. Except for , , , and , all of these… prestodb.io

6.13. Date and Time Functions and Operators" 6.12. JSON Functions and Operators 6.14. Aggregate Functions " prestodb.io

12.5. Mathematical Functions and Operators" 12.4. Conversion Functions 12.6. Decimal Functions and Operators " teradata.github.io

Presto

 

 

 

반응형

댓글