#!/usr/bin/env python # coding: utf-8 #

NTM(유해트래픽 탐지장비)

#

MTM(악성파일 탐지장비)

# In[1]: #!/usr/bin/env python # coding: utf-8 import pandas as pd import numpy as np from mlxtend.preprocessing import TransactionEncoder from mlxtend.frequent_patterns import association_rules, fpgrowth from prefixspan import PrefixSpan # load ts_data_accident-2020_sample.csv # to prevent dtypewarning, set low_memory=False df = pd.read_csv('ts_data_accident-2020_sample.csv', low_memory=False) df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna() len(df) #len(df) : 10000, load successful ##################### NTM section ##################### NTM_df=df[df['ACCD_FIND_MTD_CODE']==1] #* edit'1' to 1 len(NTM_df) #* NTM_df.head() # Pick out it in order to get the asset, risk, intent, black IP out RISK_V2=NTM_df['RISK_V2'] RISK_V2_FILTERED=RISK_V2.dropna() print(RISK_V2.size) print(RISK_V2_FILTERED.size) #* 추가 : 기존 filter_assets_value 사용시 값을 인식하지 못하는 문제 발생 -> RISK_V2를 별도의 df로 수정 import json from pandas import json_normalize risk_df = pd.DataFrame() for newVal in RISK_V2_FILTERED: newVal = newVal.replace("'", "\"") newVal_str = json.loads(newVal) newVal_df = json_normalize(newVal_str) risk_df = pd.concat([risk_df,newVal_df],ignore_index=True) risk_df_col = risk_df.columns.values.tolist() # In[352]: asset_val = [] intent_val=[] source_val=[] def filter_assets_value(risk): for i in range(len(risk)): risks=[] intents=[] sources=[] try: for key in risk_df_col: if 'ASSETS_VAL_' in key and risk.iloc[i][key]: risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key) risks.append(risk_key_desc) if 'INTENT_VAL_' in key and risk.iloc[i][key]: intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key) intents.append(intent_key_desc) if 'SOURCE_VAL_' in key and risk.iloc[i][key]: source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key) sources.append(source_key_desc) except: print(risk) print(type(risk)) finally: asset_val.append(risks) intent_val.append(intents) source_val.append(sources) # modified def get_asset_desc(asset_field): if asset_field == 'ASSETS_VAL_1': return '공인-전체IP대역(유선)' elif asset_field == 'ASSETS_VAL_2': return '공인-전체IP대역(무선)' elif asset_field == 'ASSETS_VAL_3': return '공인-WEB서버' elif asset_field == 'ASSETS_VAL_4': return '공인-내부응용서버' elif asset_field == 'ASSETS_VAL_5': return '공인-DB서버' elif asset_field == 'ASSETS_VAL_6': return '공인-패치서버' elif asset_field == 'ASSETS_VAL_7': return '공인-네트워크' elif asset_field == 'ASSETS_VAL_8': return '공인-보안' elif asset_field == 'ASSETS_VAL_9': return '공인-업무용PC' elif asset_field == 'ASSETS_VAL_10': return '공인-비업무용PC' elif asset_field == 'ASSETS_VAL_11': return '공인-기타' elif asset_field == 'ASSETS_VAL_12': return '사설-전체IP대역(유선)' elif asset_field == 'ASSETS_VAL_13': return '사설-전체IP대역(무선)' elif asset_field == 'ASSETS_VAL_14': return '사설-WEB서버' elif asset_field == 'ASSETS_VAL_15': return '사설-내부응용서버' elif asset_field == 'ASSETS_VAL_16': return '사설-DB서버' elif asset_field == 'ASSETS_VAL_17': return '사설-패치서버' elif asset_field == 'ASSETS_VAL_18': return '사설-네트워크' elif asset_field == 'ASSETS_VAL_19': return '사설-보안' elif asset_field == 'ASSETS_VAL_20': return '사설-업무용PC' elif asset_field == 'ASSETS_VAL_21': return '사설-비업무용PC' elif asset_field == 'ASSETS_VAL_22': return '사설-기타' else: return '' # modified def filter_intent(intent): intents=[] for intent_key in intent: if 'INTENT_VAL_' in intent_key and intent[intent_key]: intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key) intents.append(intent_key_desc) return intents # In[356]: def get_intent_desc(intent_field): if intent_field == 'INTENT_VAL_1': return '파괴' elif intent_field == 'INTENT_VAL_2': return '유출' elif intent_field == 'INTENT_VAL_3': return '지연' elif intent_field == 'INTENT_VAL_4': return '잠복' elif intent_field == 'INTENT_VAL_5': return '단순침입' elif intent_field == 'INTENT_VAL_6': return 'MD5' elif intent_field == 'INTENT_VAL_0': return 'Default' else: return '' # In[358]: # modified def filter_source(source): sources=[] for source_key in source: if 'SOURCE_VAL_' in source_key and source[source_key]: source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key) sources.append(source_key_desc) return sources # In[359]: def get_source_desc(source_field): if source_field=='SOURCE_VAL_1': return '북한IP' if source_field=='SOURCE_VAL_3': return 'ECSC Black IP' else: return '' # In[2]: filter_assets_value(risk_df) #뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기 # New assets column NTM_df['ASSETS_VAL']= asset_val NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str) NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace('[','', regex=False) NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace(']','', regex=False) NTM_df[:1] # New column of intent value NTM_df['INTENT_VAL']=intent_val NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str) NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace('[','',regex=False) NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace(']','',regex=False) NTM_df[:1] # New column of SOURCE_VAL value NTM_df['SOURCE_VAL']=source_val NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str) NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False) NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False) NTM_df[:5] # In[361]: NTM_df.drop(columns=['RISK_V2'], inplace=True) NTM_df.columns # In[3]: #data frame의 i번째 row를 list로 저장하여 itertools.combinations로 모든 조합 만들 예정 #TW_ATT_IP와 TW_DMG_IP의 값이 같은 경우 어떤 값과의 관계인지 알 수 없으므로 데이터 가공 NTM_df['TW_ATT_IP']="TW_ATT_IP="+NTM_df['TW_ATT_IP'].astype(str) NTM_df['TW_ATT_PORT']="TW_ATT_PORT="+NTM_df['TW_ATT_PORT'].astype(str) NTM_df['TW_DMG_IP']="TW_DMG_IP="+NTM_df['TW_DMG_IP'].astype(str) NTM_df['TW_DMG_PORT']="TW_DMG_PORT="+NTM_df['TW_DMG_PORT'].astype(str) # In[4]: ##################### 여기서부터 진행하시면 됩니다. ##################### ##################### 아래 12개 아이템(12. 장비 ACCD_FIND_MTD_CODE 제외)에 대해서 모든 아이템 조합에 알고리즘 적용하기##################### # It should be 13 columns in total # 1. 기관 INST_NM # 2. 공격 DRULE_ATT_TYPE_CODE1 # 3. 자산 ASSETS_VAL # 4. 위협공격ip TW_ATT_IP # 5. 위협공격port TW_ATT_PORT # 6. 위협피해ip TW_DMG_IP # 7. 위협피해port TW_DMG_PORT # 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM # 9. 공격국가 TW_ATT_CT_NM # 10. 의도(7개) INTENT_VAL # 11. IP/URL 가중치 SOURCE_VAL # 12. 장비 ACCD_FIND_MTD_CODE # 13. 탐지규칙명 DRULE_NM # In[363]: NTM_df.isna().sum() # Change the Nan to zero NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'') NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'') NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'') NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0) NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0) NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0) NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0) NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'') NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0) NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0) NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0) NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'') # Check NaN out again NTM_df.isna().sum() # In[5]: # NTM_df의 col을 list로 저장. itertools.combinations로 가능한 시나리오 모두 추출 # ACCD_FIND_MTD_CODE col 지우기 NTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True) # In[6]: # 12의 아이템 중 2개의 조합으로 만들어질 수 있는 모든 시나리오의 갯수 파악 import itertools item_n=[] for i in range(2,7): temp = itertools.combinations(NTM_df.columns.tolist(), i) item_n.append(list(temp)) #12C4부터 495개의 데이터를 저장하는데에 소요되는 시간이 너무 커서 단순 반복문 사용은 적합하지 않음. for i in range(len(item_n)): print("12C" + str(i+2)+" = "+str(len(item_n[i]))) # In[7]: from prefixspan import PrefixSpan # arr를 매개변수로 받아 n개의 아이템의 조합 반환 def get_combination(arr, n): combination_n = list(itertools.combinations(arr.columns.tolist(),n)) com_list=[] # row i 의 (1,2),(1,3)... 이런식으로 하니까 시간 너무 오래걸림 # (1,2) 조합에 대한 row i, row i+1, row i+2... 이렇게 바꿈 for m in range(len(combination_n[n-2])): for i in range(len(arr)): tmp_list=[] temp_df = arr.iloc[i] for col in combination_n[m]: tmp_list.append(temp_df[col]) com_list.append(tmp_list) return com_list def get_prefixspan(n, load_list, save_list, save_df): save_list = PrefixSpan(load_list) #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정 save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>n) save_df = pd.DataFrame(save_list) save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True) save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True) save_df = get_effect(save_df) return save_df def get_effect(edit_df): #Make the new column for filling the Effect edit_df['Effect']=np.nan #Change the order of columns edit_df=edit_df[['Cause','Effect','Frequency']] for i in range(len(edit_df)): drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB'] loc_value = edit_df.loc[i] for item in loc_value['Cause']: for drule in drules: if item == drule: edit_df.loc[i,'Effect'] = item return edit_df # In[8]: # 1. 두 아이템의 조합 item_of_two = get_combination(NTM_df,2) prefix_two=[] prefix_two_df = pd.DataFrame() prefix_of_two = get_prefixspan(1, item_of_two, prefix_two, prefix_two_df) prefix_of_two.to_csv('prefix_of_two.csv',sep=',') # In[9]: # 2. 세 아이템의 조합 item_of_three = get_combination(NTM_df, 3) prefix_three_tmp=[] prefix_three_df = pd.DataFrame() prefix_of_three = get_prefixspan(2, item_of_three, prefix_three_tmp, prefix_three_df) prefix_of_three # In[ ]: # In[10]: # 3. 네 아이템의 조합 item_of_four = get_combination(NTM_df, 4) prefix_four_tmp=[] prefix_four_df = pd.DataFrame() prefix_of_four = get_prefixspan(3, item_of_four, prefix_four_tmp, prefix_four_df) # In[11]: # 4. 다섯 아이템의 조합 item_of_five = get_combination(NTM_df, 5) prefix_five_tmp=[] prefix_five_df = pd.DataFrame() prefix_of_five = get_prefixspan(4, item_of_five, prefix_five_tmp, prefix_five_df) prefix_of_five # In[12]: # 5. 여섯 아이템의 조합 item_of_six = get_combination(NTM_df, 6) prefix_six_tmp=[] prefix_six_df = pd.DataFrame() prefix_of_six = get_prefixspan(5, item_of_six, prefix_six_tmp, prefix_six_df) prefix_of_six ##################### NTM section End ##################### # In[13]: ##################### MTM section ##################### # Same goes for the MTM section # In[375]: MTM_df=df[df['ACCD_FIND_MTD_CODE']==2] len(MTM_df) # In[376]: # Pick out it in order to get the asset, risk, intent, black IP out RISK_V2_MTM=MTM_df['RISK_V2'] RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna() print(RISK_V2_MTM.size) print(RISK_V2_FILTERED_MTM.size) risk_df_MTM = pd.DataFrame() for newVal_MTM in RISK_V2_FILTERED_MTM: newVal_MTM = newVal_MTM.replace("'", "\"") newVal_MTM_str = json.loads(newVal_MTM) newVal_df_MTM = json_normalize(newVal_MTM_str) risk_df_MTM = pd.concat([risk_df_MTM,newVal_df_MTM],ignore_index=True) risk_df_col_MTM = risk_df_MTM.columns.values.tolist() # In[377]: asset_val_MTM = [] intent_val_MTM=[] source_val_MTM=[] def filter_assets_value_MTM(risk): for i in range(len(risk)): risks=[] intents=[] sources=[] try: for key in risk_df_col: if 'ASSETS_VAL_' in key and risk.iloc[i][key]: risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key) risks.append(risk_key_desc) if 'INTENT_VAL_' in key and risk.iloc[i][key]: intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key) intents.append(intent_key_desc) if 'SOURCE_VAL_' in key and risk.iloc[i][key]: source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key) sources.append(source_key_desc) except: print(risk) print(type(risk)) finally: asset_val_MTM.append(risks) intent_val_MTM.append(intents) source_val_MTM.append(sources) # In[378]: # modified def get_asset_desc_MTM(asset_field): if asset_field == 'ASSETS_VAL_1': return '공인-전체IP대역(유선)' elif asset_field == 'ASSETS_VAL_2': return '공인-전체IP대역(무선)' elif asset_field == 'ASSETS_VAL_3': return '공인-WEB서버' elif asset_field == 'ASSETS_VAL_4': return '공인-내부응용서버' elif asset_field == 'ASSETS_VAL_5': return '공인-DB서버' elif asset_field == 'ASSETS_VAL_6': return '공인-패치서버' elif asset_field == 'ASSETS_VAL_7': return '공인-네트워크' elif asset_field == 'ASSETS_VAL_8': return '공인-보안' elif asset_field == 'ASSETS_VAL_9': return '공인-업무용PC' elif asset_field == 'ASSETS_VAL_10': return '공인-비업무용PC' elif asset_field == 'ASSETS_VAL_11': return '공인-기타' elif asset_field == 'ASSETS_VAL_12': return '사설-전체IP대역(유선)' elif asset_field == 'ASSETS_VAL_13': return '사설-전체IP대역(무선)' elif asset_field == 'ASSETS_VAL_14': return '사설-WEB서버' elif asset_field == 'ASSETS_VAL_15': return '사설-내부응용서버' elif asset_field == 'ASSETS_VAL_16': return '사설-DB서버' elif asset_field == 'ASSETS_VAL_17': return '사설-패치서버' elif asset_field == 'ASSETS_VAL_18': return '사설-네트워크' elif asset_field == 'ASSETS_VAL_19': return '사설-보안' elif asset_field == 'ASSETS_VAL_20': return '사설-업무용PC' elif asset_field == 'ASSETS_VAL_21': return '사설-비업무용PC' elif asset_field == 'ASSETS_VAL_22': return '사설-기타' else: return '' # In[381]: # modified def filter_intent_MTM(intent): intents=[] for intent_key in intent: if 'INTENT_VAL_' in intent_key and intent[intent_key]: intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key) intents.append(intent_key_desc) return intents # In[382]: def get_intent_desc_MTM(intent_field): if intent_field == 'INTENT_VAL_1': return '파괴' elif intent_field == 'INTENT_VAL_2': return '유출' elif intent_field == 'INTENT_VAL_3': return '지연' elif intent_field == 'INTENT_VAL_4': return '잠복' elif intent_field == 'INTENT_VAL_5': return '단순침입' elif intent_field == 'INTENT_VAL_6': return 'MD5' elif intent_field == 'INTENT_VAL_0': return 'Default' else: return '' # In[384]: # modified def filter_source_MTM(source): sources=[] for source_key in source: if 'SOURCE_VAL_' in source_key and source[source_key]: source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key) sources.append(source_key_desc) return sources # In[385]: def get_source_desc_MTM(source_field): if source_field=='SOURCE_VAL_1': return '북한IP' if source_field=='SOURCE_VAL_3': return 'ECSC Black IP' else: return '' # In[386]: filter_assets_value(risk_df_MTM) #뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기 # New assets column MTM_df['ASSETS_VAL']= asset_val_MTM MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str) MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace('[','', regex=False) MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace(']','', regex=False) MTM_df[:1] # New column of intent value MTM_df['INTENT_VAL']=intent_val_MTM MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str) MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace('[','',regex=False) MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace(']','',regex=False) MTM_df[:1] # New column of SOURCE_VAL value MTM_df['SOURCE_VAL']=source_val_NTN MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str) MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False) MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False) MTM_df[:5] # In[361]: MTM_df.drop(columns=['RISK_V2'], inplace=True) MTM_df.columns # In[388]: MTM_df.isna().sum() # In[389]: # Change the Nan to zero MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'') MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'') MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'') MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0) MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0) MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0) MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0) MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'') MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0) MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0) MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0) MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'') # In[390]: # Check NaN out again MTM_df.isna().sum() # In[391]: # ACCD_FIND_MTD_CODE col 지우기 MTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True) # arr를 매개변수로 받아 n개의 아이템의 조합 반환 def get_combination_MTM(arr, n): combination_n = list(itertools.combinations(arr.columns.tolist(),n)) com_list=[] # row i 의 (1,2),(1,3)... 이런식으로 하니까 시간 너무 오래걸림 # (1,2) 조합에 대한 row i, row i+1, row i+2... 이렇게 바꿈 for m in range(len(combination_n[n-2])): for i in range(len(arr)): tmp_list=[] temp_df = arr.iloc[i] for col in combination_n[m]: tmp_list.append(temp_df[col]) com_list.append(tmp_list) return com_list def get_prefixspan_MTM(n, load_list, save_list, save_df): save_list = PrefixSpan(load_list) #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정 save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>n) save_df = pd.DataFrame(save_list) save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True) save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True) save_df = get_effect(save_df) return save_df def get_effect_MTM(edit_df): #Make the new column for filling the Effect edit_df['Effect']=np.nan #Change the order of columns edit_df=edit_df[['Cause','Effect','Frequency']] for i in range(len(edit_df)): drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB'] loc_value = edit_df.loc[i] for item in loc_value['Cause']: for drule in drules: if item == drule: edit_df.loc[i,'Effect'] = item return edit_df # 1. 두 아이템의 조합 item_of_two_MTM = get_combination(MTM_df,2) prefix_two_MTM=[] prefix_two_df_MTM = pd.DataFrame() prefix_of_two_MTM = get_prefixspan(1, item_of_two_MTM, prefix_two_MTM, prefix_two_d_MTMf) prefix_of_two_MTM # 2. 세 아이템의 조합 item_of_three_MTM = get_combination(MTM_df, 3) prefix_three_tmp_MTM=[] prefix_three_df_MTM = pd.DataFrame() prefix_of_three_MTM = get_prefixspan(2, item_of_three_MTM, prefix_three_tmp_MTM, prefix_three_df_MTM) prefix_of_three_MTM # 3. 네 아이템의 조합 item_of_four_MTM = get_combination(MTM_df, 4) prefix_four_tmp_MTM=[] prefix_four_df_MTM = pd.DataFrame() prefix_of_four_MTM = get_prefixspan(3, item_of_four_MTM, prefix_four_tmp_MTM, prefix_four_df_MTM) # 4. 다섯 아이템의 조합 item_of_five_MTM = get_combination(MTM_df, 5) prefix_five_tmp_MTM=[] prefix_five_df_MTM = pd.DataFrame() prefix_of_five_MTM = get_prefixspan(4, item_of_five_MTM, prefix_five_tmp_MTM, prefix_five_df_MTM) prefix_of_five_MTM # 5. 여섯 아이템의 조합 item_of_six_MTM = get_combination(MTM_df, 6) prefix_six_tmp_MTM=[] prefix_six_df_MTM = pd.DataFrame() prefix_of_six_MTM = get_prefixspan(5, item_of_six_MTM, prefix_six_tmp_MTM, prefix_six_df_MTM) prefix_of_six_MTM ##################### MTM section End ##################### # In[ ]: # In[ ]: