#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import numpy as np from mlxtend.preprocessing import TransactionEncoder from mlxtend.frequent_patterns import association_rules, fpgrowth from prefixspan import PrefixSpan df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna() len(df) df.head() # In[349]: ##################### NTM section ##################### # In[350]: NTM_df=df[df['ACCD_FIND_MTD_CODE']=='1'] len(NTM_df) # In[351]: # Pick out it in order to get the asset, risk, intent, black IP out RISK_V2=NTM_df['RISK_V2'] RISK_V2_FILTERED=RISK_V2.dropna() print(RISK_V2.size) print(RISK_V2_FILTERED.size) # In[352]: def filter_assets_value(risk): risks=[] try: for risk_key in risk: if 'ASSETS_VAL_' in risk_key and risk[risk_key]: risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key) risks.append(risk_key_desc) except: print(risk) print(type(risk)) finally: return risks # In[353]: # modified def get_asset_desc(asset_field): if asset_field == 'ASSETS_VAL_1': return '공인-전체IP대역(유선)' elif asset_field == 'ASSETS_VAL_2': return '공인-전체IP대역(무선)' elif asset_field == 'ASSETS_VAL_3': return '공인-WEB서버' elif asset_field == 'ASSETS_VAL_4': return '공인-내부응용서버' elif asset_field == 'ASSETS_VAL_5': return '공인-DB서버' elif asset_field == 'ASSETS_VAL_6': return '공인-패치서버' elif asset_field == 'ASSETS_VAL_7': return '공인-네트워크' elif asset_field == 'ASSETS_VAL_8': return '공인-보안' elif asset_field == 'ASSETS_VAL_9': return '공인-업무용PC' elif asset_field == 'ASSETS_VAL_10': return '공인-비업무용PC' elif asset_field == 'ASSETS_VAL_11': return '공인-기타' elif asset_field == 'ASSETS_VAL_12': return '사설-전체IP대역(유선)' elif asset_field == 'ASSETS_VAL_13': return '사설-전체IP대역(무선)' elif asset_field == 'ASSETS_VAL_14': return '사설-WEB서버' elif asset_field == 'ASSETS_VAL_15': return '사설-내부응용서버' elif asset_field == 'ASSETS_VAL_16': return '사설-DB서버' elif asset_field == 'ASSETS_VAL_17': return '사설-패치서버' elif asset_field == 'ASSETS_VAL_18': return '사설-네트워크' elif asset_field == 'ASSETS_VAL_19': return '사설-보안' elif asset_field == 'ASSETS_VAL_20': return '사설-업무용PC' elif asset_field == 'ASSETS_VAL_21': return '사설-비업무용PC' elif asset_field == 'ASSETS_VAL_22': return '사설-기타' else: return '' # In[354]: # New assets column NTM_df['ASSETS_VAL']=list(map(filter_assets_value, RISK_V2_FILTERED)) NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str) NTM_df[:1] # In[355]: # modified def filter_intent(intent): intents=[] for intent_key in intent: if 'INTENT_VAL_' in intent_key and intent[intent_key]: intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key) intents.append(intent_key_desc) return intents # In[356]: def get_intent_desc(intent_field): if intent_field == 'INTENT_VAL_1': return '파괴' elif intent_field == 'INTENT_VAL_2': return '유출' elif intent_field == 'INTENT_VAL_3': return '지연' elif intent_field == 'INTENT_VAL_4': return '잠복' elif intent_field == 'INTENT_VAL_5': return '단순침입' elif intent_field == 'INTENT_VAL_6': return 'MD5' elif intent_field == 'INTENT_VAL_0': return 'Default' else: return '' # In[357]: # New column of intent value NTM_df['INTENT_VAL']=list(map(filter_intent, RISK_V2_FILTERED)) NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str) NTM_df[:1] # In[358]: # modified def filter_source(source): sources=[] for source_key in source: if 'SOURCE_VAL_' in source_key and source[source_key]: source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key) sources.append(source_key_desc) return sources # In[359]: def get_source_desc(source_field): if source_field=='SOURCE_VAL_1': return '북한IP' if source_field=='SOURCE_VAL_3': return 'ECSC Black IP' else: return '' # In[360]: # New column of SOURCE_VAL value NTM_df['SOURCE_VAL']=list(map(filter_source, RISK_V2_FILTERED)) NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str) NTM_df[:5] # In[361]: NTM_df.drop(columns=['RISK_V2'], inplace=True) NTM_df.columns # In[362]: ##################### 여기서부터 진행하시면 됩니다. ##################### ##################### 아래 12개 아이템(12. 장비 ACCD_FIND_MTD_CODE 제외)에 대해서 모든 아이템 조합에 알고리즘 적용하기##################### # It should be 13 columns in total # 1. 기관 INST_NM # 2. 공격 DRULE_ATT_TYPE_CODE1 # 3. 자산 ASSETS_VAL # 4. 위협공격ip TW_ATT_IP # 5. 위협공격port TW_ATT_PORT # 6. 위협피해ip TW_DMG_IP # 7. 위협피해port TW_DMG_PORT # 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM # 9. 공격국가 TW_ATT_CT_NM # 10. 의도(7개) INTENT_VAL # 11. IP/URL 가중치 SOURCE_VAL # 12. 장비 ACCD_FIND_MTD_CODE # 13. 탐지규칙명 DRULE_NM # # In[363]: NTM_df.isna().sum() # In[364]: # Change the Nan to zero NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'') NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'') NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'') NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0) NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0) NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0) NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0) NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'') NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0) NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0) NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0) NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'') # In[365]: # Check NaN out again NTM_df.isna().sum() # In[366]: # # Merge all # # Make one string from all of elements NTM_df['Combined']=NTM_df['INST_NM'].astype(str)+' '+NTM_df['TW_ATT_IP'].astype(str) +' '+NTM_df['TW_ATT_PORT'].astype(str)+' '+NTM_df['TW_DMG_IP'].astype(str)+' ' +NTM_df['TW_DMG_PORT'].astype(str) +' '+NTM_df['ACCD_DMG_PROTO_NM'].astype(str) +' '+NTM_df['TW_ATT_CT_NM']+' '+NTM_df['ASSETS_VAL']+' '+NTM_df['INTENT_VAL']+' ' +NTM_df['SOURCE_VAL']+' '+NTM_df['DRULE_ATT_TYPE_CODE1']+' '+NTM_df['DRULE_NM'] NTM_com=NTM_df['Combined'] NTM_com[:10] # In[367]: # Change the type to DataFrame NTM_to_df=pd.DataFrame(NTM_com) NTM_to_df[:5] # In[368]: # Change the type to list in order to apply the algorithm(nested list) NTM_tolist=NTM_to_df.values.tolist() NTM_tolist[:5] # In[369]: from prefixspan import PrefixSpan # In[370]: # Apply prefixspan PrefixSpan_NTM = PrefixSpan(NTM_tolist) ###### Interchangeable ###### # Get any over frequency 1 prefix_NTM=PrefixSpan_NTM.frequent(1) prefix_NTM[:3] # In[371]: # Put the result to DataFrame prefix_NTM_df=pd.DataFrame(prefix_NTM) prefix_NTM_df[:5] # In[372]: # Change the columns name prefix_NTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True) # Make the new column for filling the Effect prefix_NTM_df['Effect']=np.nan # Change the order of columns prefix_NTM_df=prefix_NTM_df[['Cause','Effect','Frequency']] prefix_NTM_df[:2] # In[373]: # Define the function that find the rule name def generate_cause(cell): drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB'] for drule in drules: if ' '+drule in cell[0]: return drule return '' # Mapping the rule name with cause that is the effect effect=list(map(generate_cause, prefix_NTM_df.Cause)) # Assign the rule name as an effect prefix_NTM_df['Effect']=effect prefix_NTM_df.sort_values(by=['Frequency'],ascending=False) # In[374]: # Attack Filter def Attack_filter(ps): return ' Attack' in ps[0] att_filter=prefix_NTM_df[list(map(Attack_filter, prefix_NTM_df.Cause))].fillna('Attack') # Malwr Filter def Malwr_filter(ps): return ' Malwr' in ps[0] mal_filter=prefix_NTM_df[list(map(Malwr_filter, prefix_NTM_df.Cause))].fillna('Malwr') # DDOS Filter def DDOS_filter(ps): return ' DDOS' in ps[0] dd_filter=prefix_NTM_df[list(map(DDOS_filter, prefix_NTM_df.Cause))].fillna('DDOS') # HACK Filter def HACK_filter(ps): return ' HACK' in ps[0] hack_filter=prefix_NTM_df[list(map(HACK_filter, prefix_NTM_df.Cause))].fillna('HACK') # MAIL Filter def MAIL_filter(ps): return ' MAIL' in ps[0] mail_filter=prefix_NTM_df[list(map(MAIL_filter, prefix_NTM_df.Cause))].fillna('MAIL') # WEB Filter def WEB_filter(ps): return ' WEB' in ps[0] prefix_NTM_df web_filter=prefix_NTM_df[list(map(WEB_filter, prefix_NTM_df.Cause))].fillna('WEB') frames = [att_filter, mal_filter, dd_filter, hack_filter, mail_filter, web_filter] result = pd.concat(frames) result.sort_values(by=['Frequency'],ascending=False) # In[ ]: ##################### NTM section End ##################### # In[ ]: ##################### MTM section ##################### # In[375]: MTM_df=df[df['ACCD_FIND_MTD_CODE']=='2'] len(MTM_df) # In[376]: # Pick out it in order to get the asset, risk, intent, black IP out RISK_V2_MTM=MTM_df['RISK_V2'] RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna() print(RISK_V2_MTM.size) print(RISK_V2_FILTERED_MTM.size) # In[377]: def filter_assets_value_MTM(risk): risks=[] try: for risk_key in risk: if 'ASSETS_VAL_' in risk_key and risk[risk_key]: risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key) risks.append(risk_key_desc) except: print(risk) print(type(risk)) finally: return risks # In[378]: # modified def get_asset_desc_MTM(asset_field): if asset_field == 'ASSETS_VAL_1': return '공인-전체IP대역(유선)' elif asset_field == 'ASSETS_VAL_2': return '공인-전체IP대역(무선)' elif asset_field == 'ASSETS_VAL_3': return '공인-WEB서버' elif asset_field == 'ASSETS_VAL_4': return '공인-내부응용서버' elif asset_field == 'ASSETS_VAL_5': return '공인-DB서버' elif asset_field == 'ASSETS_VAL_6': return '공인-패치서버' elif asset_field == 'ASSETS_VAL_7': return '공인-네트워크' elif asset_field == 'ASSETS_VAL_8': return '공인-보안' elif asset_field == 'ASSETS_VAL_9': return '공인-업무용PC' elif asset_field == 'ASSETS_VAL_10': return '공인-비업무용PC' elif asset_field == 'ASSETS_VAL_11': return '공인-기타' elif asset_field == 'ASSETS_VAL_12': return '사설-전체IP대역(유선)' elif asset_field == 'ASSETS_VAL_13': return '사설-전체IP대역(무선)' elif asset_field == 'ASSETS_VAL_14': return '사설-WEB서버' elif asset_field == 'ASSETS_VAL_15': return '사설-내부응용서버' elif asset_field == 'ASSETS_VAL_16': return '사설-DB서버' elif asset_field == 'ASSETS_VAL_17': return '사설-패치서버' elif asset_field == 'ASSETS_VAL_18': return '사설-네트워크' elif asset_field == 'ASSETS_VAL_19': return '사설-보안' elif asset_field == 'ASSETS_VAL_20': return '사설-업무용PC' elif asset_field == 'ASSETS_VAL_21': return '사설-비업무용PC' elif asset_field == 'ASSETS_VAL_22': return '사설-기타' else: return '' # In[379]: # New assets column MTM_df['ASSETS_VAL']=list(map(filter_assets_value_MTM, RISK_V2_FILTERED_MTM)) MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str) MTM_df[:1] # In[381]: # modified def filter_intent_MTM(intent): intents=[] for intent_key in intent: if 'INTENT_VAL_' in intent_key and intent[intent_key]: intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key) intents.append(intent_key_desc) return intents # In[382]: def get_intent_desc_MTM(intent_field): if intent_field == 'INTENT_VAL_1': return '파괴' elif intent_field == 'INTENT_VAL_2': return '유출' elif intent_field == 'INTENT_VAL_3': return '지연' elif intent_field == 'INTENT_VAL_4': return '잠복' elif intent_field == 'INTENT_VAL_5': return '단순침입' elif intent_field == 'INTENT_VAL_6': return 'MD5' elif intent_field == 'INTENT_VAL_0': return 'Default' else: return '' # In[383]: # New column of intent value MTM_df['INTENT_VAL']=list(map(filter_intent_MTM, RISK_V2_FILTERED_MTM)) MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str) MTM_df[:1] # In[384]: # modified def filter_source_MTM(source): sources=[] for source_key in source: if 'SOURCE_VAL_' in source_key and source[source_key]: source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key) sources.append(source_key_desc) return sources # In[385]: def get_source_desc_MTM(source_field): if source_field=='SOURCE_VAL_1': return '북한IP' if source_field=='SOURCE_VAL_3': return 'ECSC Black IP' else: return '' # In[386]: # New column of SOURCE_VAL value MTM_df['SOURCE_VAL']=list(map(filter_source_MTM, RISK_V2_FILTERED_MTM)) MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].astype(str) MTM_df[:5] # In[387]: MTM_df.drop(columns=['RISK_V2'], inplace=True) MTM_df.columns # In[388]: MTM_df.isna().sum() # In[389]: # Change the Nan to zero MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'') MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'') MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'') MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0) MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0) MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0) MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0) MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'') MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0) MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0) MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0) MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'') # In[390]: # Check NaN out again MTM_df.isna().sum() # In[391]: # # Merge all # # Make one string from all of elements MTM_df['Combined']=MTM_df['INST_NM'].astype(str)+' '+MTM_df['TW_ATT_IP'].astype(str)+' '+MTM_df['TW_ATT_PORT'].astype(str)+' '+MTM_df['TW_DMG_IP'].astype(str)+' '+MTM_df['TW_DMG_PORT'].astype(str) +' '+MTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+MTM_df['TW_ATT_CT_NM']+' '+MTM_df['ASSETS_VAL']+' '+MTM_df['INTENT_VAL']+' '+MTM_df['SOURCE_VAL']+' '+MTM_df['DRULE_ATT_TYPE_CODE1']+' '+MTM_df['DRULE_NM'] MTM_com=MTM_df['Combined'] MTM_com[:10] # In[392]: # Change the type to DataFrame MTM_to_df=pd.DataFrame(MTM_com) MTM_to_df[:5] # In[393]: # Change the type to list in order to apply the algorithm(nested list) MTM_tolist=MTM_to_df.values.tolist() MTM_tolist[:5] # In[394]: # Apply prefixspan PrefixSpan_MTM = PrefixSpan(MTM_tolist) ###### Interchangeable ###### # Get any over frequency 1 prefix_MTM=PrefixSpan_MTM.frequent(1) prefix_MTM[:3] # In[395]: # Put the result to DataFrame prefix_MTM_df=pd.DataFrame(prefix_MTM) prefix_MTM_df[:5] # In[396]: # Change the columns name prefix_MTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True) # Make the new column for filling the Effect prefix_MTM_df['Effect']=np.nan # Change the order of columns prefix_MTM_df=prefix_MTM_df[['Cause','Effect','Frequency']] prefix_MTM_df[:2] # In[397]: # Define the function that find the rule name def generate_cause_MTM(cell): drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB'] for drule in drules: if ' '+drule in cell[0]: return drule return '' # Mapping the rule name with cause that is the effect effect_MTM=list(map(generate_cause, prefix_MTM_df.Cause)) # Assign the rule name as an effect prefix_MTM_df['Effect']=effect_MTM prefix_MTM_df.sort_values(by=['Frequency'],ascending=False) # In[399]: # Attack Filter def Attack_filter_MTM(ps): return ' Attack' in ps[0] att_filter_MTM=prefix_MTM_df[list(map(Attack_filter_MTM, prefix_MTM_df.Cause))].fillna('Attack') # Malwr Filter def Malwr_filter_MTM(ps): return ' Malwr' in ps[0] mal_filter_MTM=prefix_MTM_df[list(map(Malwr_filter_MTM, prefix_MTM_df.Cause))].fillna('Malwr') # DDOS Filter def DDOS_filter_MTM(ps): return ' DDOS' in ps[0] dd_filter_MTM=prefix_MTM_df[list(map(DDOS_filter_MTM, prefix_MTM_df.Cause))].fillna('DDOS') # HACK Filter def HACK_filter_MTM(ps): return ' HACK' in ps[0] hack_filter_MTM=prefix_MTM_df[list(map(HACK_filter_MTM, prefix_MTM_df.Cause))].fillna('HACK') # MAIL Filter def MAIL_filter_MTM(ps): return ' MAIL' in ps[0] mail_filter_MTM=prefix_MTM_df[list(map(MAIL_filter_MTM, prefix_MTM_df.Cause))].fillna('MAIL') # WEB Filter def WEB_filter_MTM(ps): return ' WEB' in ps[0] prefix_MTM_df[:5] web_filter_MTM=prefix_MTM_df[list(map(WEB_filter_MTM, prefix_MTM_df.Cause))].fillna('WEB') frames_MTM = [att_filter_MTM, mal_filter_MTM, dd_filter_MTM, hack_filter_MTM, mail_filter_MTM, web_filter_MTM] result_MTM = pd.concat(frames_MTM) result_MTM.sort_values(by=['Frequency'],ascending=False) # In[ ]: