Преглед на файлове

MTM Section 추가

master
yevKwon преди 4 години
родител
ревизия
400a8d89ab
променени са 1 файла, в които са добавени 731 реда и са изтрити 0 реда
  1. 731
    0
      keris.ipynb/PrefixSpan_20210925_edit.py

+ 731
- 0
keris.ipynb/PrefixSpan_20210925_edit.py Целия файл

@@ -0,0 +1,731 @@
1
+#!/usr/bin/env python
2
+# coding: utf-8
3
+
4
+# <p>NTM(유해트래픽 탐지장비)</p>
5
+# <p>MTM(악성파일 탐지장비)</p>
6
+
7
+# In[1]:
8
+
9
+
10
+#!/usr/bin/env python
11
+# coding: utf-8
12
+
13
+import pandas as pd
14
+import numpy as np
15
+from mlxtend.preprocessing import TransactionEncoder
16
+from mlxtend.frequent_patterns import association_rules, fpgrowth
17
+from prefixspan import PrefixSpan
18
+
19
+# load ts_data_accident-2020_sample.csv
20
+# to prevent dtypewarning, set low_memory=False
21
+df = pd.read_csv('ts_data_accident-2020_sample.csv', low_memory=False)
22
+df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()
23
+len(df) #len(df) : 10000, load successful
24
+
25
+##################### NTM section #####################
26
+NTM_df=df[df['ACCD_FIND_MTD_CODE']==1] #* edit'1' to 1
27
+len(NTM_df)
28
+#* NTM_df.head()
29
+
30
+# Pick out it in order to get the asset, risk, intent, black IP out
31
+RISK_V2=NTM_df['RISK_V2']
32
+
33
+RISK_V2_FILTERED=RISK_V2.dropna()
34
+print(RISK_V2.size)
35
+print(RISK_V2_FILTERED.size)
36
+
37
+#* 추가 : 기존 filter_assets_value 사용시 값을 인식하지 못하는 문제 발생 -> RISK_V2를 별도의 df로 수정
38
+import json
39
+from pandas import json_normalize
40
+risk_df = pd.DataFrame()
41
+for newVal in RISK_V2_FILTERED:
42
+    newVal = newVal.replace("'", "\"")
43
+    newVal_str = json.loads(newVal)
44
+    newVal_df = json_normalize(newVal_str) 
45
+    risk_df = pd.concat([risk_df,newVal_df],ignore_index=True) 
46
+    
47
+risk_df_col = risk_df.columns.values.tolist()
48
+
49
+# In[352]:
50
+asset_val = []
51
+intent_val=[]
52
+source_val=[]
53
+def filter_assets_value(risk):
54
+    for i in range(len(risk)):
55
+        risks=[]
56
+        intents=[]
57
+        sources=[]
58
+        try:
59
+            for key in risk_df_col:
60
+                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:
61
+                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)
62
+                    risks.append(risk_key_desc)
63
+                if 'INTENT_VAL_' in key and risk.iloc[i][key]:
64
+                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)
65
+                    intents.append(intent_key_desc)
66
+                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:
67
+                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)
68
+                    sources.append(source_key_desc)
69
+        except:
70
+            print(risk)
71
+            print(type(risk))
72
+        finally:
73
+            asset_val.append(risks)
74
+            intent_val.append(intents)
75
+            source_val.append(sources)
76
+    
77
+    
78
+# modified
79
+def get_asset_desc(asset_field):
80
+    if asset_field == 'ASSETS_VAL_1':
81
+        return '공인-전체IP대역(유선)'
82
+    elif asset_field == 'ASSETS_VAL_2':
83
+        return '공인-전체IP대역(무선)'
84
+    elif asset_field == 'ASSETS_VAL_3':
85
+        return '공인-WEB서버'
86
+    elif asset_field == 'ASSETS_VAL_4':
87
+        return '공인-내부응용서버'
88
+    elif asset_field == 'ASSETS_VAL_5':
89
+        return '공인-DB서버'
90
+    elif asset_field == 'ASSETS_VAL_6':
91
+        return '공인-패치서버'
92
+    elif asset_field == 'ASSETS_VAL_7':
93
+        return '공인-네트워크'
94
+    elif asset_field == 'ASSETS_VAL_8':
95
+        return '공인-보안'
96
+    elif asset_field == 'ASSETS_VAL_9':
97
+        return '공인-업무용PC'
98
+    elif asset_field == 'ASSETS_VAL_10':
99
+        return '공인-비업무용PC'
100
+    elif asset_field == 'ASSETS_VAL_11':
101
+        return '공인-기타'
102
+    elif asset_field == 'ASSETS_VAL_12':
103
+        return '사설-전체IP대역(유선)'
104
+    elif asset_field == 'ASSETS_VAL_13':
105
+        return '사설-전체IP대역(무선)'
106
+    elif asset_field == 'ASSETS_VAL_14':
107
+        return '사설-WEB서버'
108
+    elif asset_field == 'ASSETS_VAL_15':
109
+        return '사설-내부응용서버'
110
+    elif asset_field == 'ASSETS_VAL_16':
111
+        return '사설-DB서버'
112
+    elif asset_field == 'ASSETS_VAL_17':
113
+        return '사설-패치서버'
114
+    elif asset_field == 'ASSETS_VAL_18':
115
+        return '사설-네트워크'
116
+    elif asset_field == 'ASSETS_VAL_19':
117
+        return '사설-보안'
118
+    elif asset_field == 'ASSETS_VAL_20':
119
+        return '사설-업무용PC'
120
+    elif asset_field == 'ASSETS_VAL_21':
121
+        return '사설-비업무용PC'
122
+    elif asset_field == 'ASSETS_VAL_22':
123
+        return '사설-기타'
124
+    else:
125
+        return ''
126
+
127
+
128
+
129
+# modified
130
+def filter_intent(intent):
131
+    intents=[]
132
+    for intent_key in intent:
133
+        if 'INTENT_VAL_' in intent_key and intent[intent_key]:
134
+            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
135
+            intents.append(intent_key_desc)
136
+    return intents
137
+
138
+
139
+# In[356]:
140
+
141
+
142
+def get_intent_desc(intent_field):
143
+    if intent_field == 'INTENT_VAL_1':
144
+        return '파괴'
145
+    elif intent_field == 'INTENT_VAL_2':
146
+        return '유출'
147
+    elif intent_field == 'INTENT_VAL_3':
148
+        return '지연'
149
+    elif intent_field == 'INTENT_VAL_4':
150
+        return '잠복'
151
+    elif intent_field == 'INTENT_VAL_5':
152
+        return '단순침입'
153
+    elif intent_field == 'INTENT_VAL_6':
154
+        return 'MD5'
155
+    elif intent_field == 'INTENT_VAL_0':
156
+        return 'Default'
157
+    else:
158
+        return ''
159
+
160
+
161
+# In[358]:
162
+
163
+
164
+# modified
165
+def filter_source(source):
166
+    sources=[]
167
+    for source_key in source:
168
+        if 'SOURCE_VAL_' in source_key and source[source_key]:
169
+            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
170
+            sources.append(source_key_desc)
171
+    return sources
172
+
173
+
174
+# In[359]:
175
+
176
+
177
+def get_source_desc(source_field):
178
+    if source_field=='SOURCE_VAL_1':
179
+        return '북한IP'
180
+    if source_field=='SOURCE_VAL_3':
181
+        return 'ECSC Black IP'
182
+    else:
183
+        return ''
184
+
185
+
186
+
187
+# In[2]:
188
+
189
+
190
+filter_assets_value(risk_df)
191
+#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기
192
+# New assets column
193
+NTM_df['ASSETS_VAL']= asset_val
194
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)
195
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace('[','', regex=False)
196
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace(']','', regex=False)
197
+NTM_df[:1]
198
+# New column of intent value
199
+NTM_df['INTENT_VAL']=intent_val
200
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)
201
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace('[','',regex=False)
202
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace(']','',regex=False)
203
+NTM_df[:1]
204
+# New column of SOURCE_VAL value
205
+NTM_df['SOURCE_VAL']=source_val
206
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)
207
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)
208
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)
209
+NTM_df[:5]
210
+
211
+# In[361]:
212
+NTM_df.drop(columns=['RISK_V2'], inplace=True)
213
+NTM_df.columns
214
+
215
+
216
+# In[3]:
217
+
218
+
219
+#data frame의 i번째 row를 list로 저장하여 itertools.combinations로 모든 조합 만들 예정
220
+#TW_ATT_IP와 TW_DMG_IP의 값이 같은 경우 어떤 값과의 관계인지 알 수 없으므로 데이터 가공
221
+NTM_df['TW_ATT_IP']="TW_ATT_IP="+NTM_df['TW_ATT_IP'].astype(str)
222
+NTM_df['TW_ATT_PORT']="TW_ATT_PORT="+NTM_df['TW_ATT_PORT'].astype(str)
223
+NTM_df['TW_DMG_IP']="TW_DMG_IP="+NTM_df['TW_DMG_IP'].astype(str)
224
+NTM_df['TW_DMG_PORT']="TW_DMG_PORT="+NTM_df['TW_DMG_PORT'].astype(str)
225
+
226
+
227
+# In[4]:
228
+
229
+
230
+##################### 여기서부터 진행하시면 됩니다. #####################
231
+##################### 아래 12개 아이템(12. 장비 ACCD_FIND_MTD_CODE 제외)에 대해서 모든 아이템 조합에 알고리즘 적용하기#####################
232
+
233
+# It should be 13 columns in total
234
+
235
+# 1. 기관 INST_NM
236
+# 2. 공격 DRULE_ATT_TYPE_CODE1
237
+# 3. 자산 ASSETS_VAL
238
+# 4. 위협공격ip TW_ATT_IP
239
+# 5. 위협공격port TW_ATT_PORT
240
+# 6. 위협피해ip TW_DMG_IP
241
+# 7. 위협피해port TW_DMG_PORT
242
+# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM
243
+# 9. 공격국가 TW_ATT_CT_NM
244
+# 10. 의도(7개) INTENT_VAL
245
+# 11. IP/URL 가중치 SOURCE_VAL
246
+# 12. 장비 ACCD_FIND_MTD_CODE
247
+# 13. 탐지규칙명 DRULE_NM
248
+
249
+
250
+# In[363]:
251
+NTM_df.isna().sum()
252
+
253
+
254
+# Change the Nan to zero
255
+NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
256
+NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')
257
+NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
258
+NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)
259
+NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)
260
+NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)
261
+NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)
262
+NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
263
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)
264
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)
265
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)
266
+NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')
267
+
268
+
269
+# Check NaN out again
270
+NTM_df.isna().sum()
271
+
272
+
273
+# In[5]:
274
+
275
+
276
+# NTM_df의 col을 list로 저장. itertools.combinations로 가능한 시나리오 모두 추출
277
+
278
+# ACCD_FIND_MTD_CODE col 지우기
279
+NTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)
280
+
281
+
282
+# In[6]:
283
+
284
+
285
+# 12의 아이템 중 2개의 조합으로 만들어질 수 있는 모든 시나리오의 갯수 파악
286
+import itertools
287
+item_n=[]
288
+for i in range(2,7):
289
+    temp = itertools.combinations(NTM_df.columns.tolist(), i)
290
+    item_n.append(list(temp))
291
+
292
+#12C4부터 495개의 데이터를 저장하는데에 소요되는 시간이 너무 커서 단순 반복문 사용은 적합하지 않음.
293
+for i in range(len(item_n)):
294
+    print("12C" + str(i+2)+" = "+str(len(item_n[i])))
295
+
296
+
297
+# In[7]:
298
+
299
+
300
+from prefixspan import PrefixSpan
301
+
302
+# arr를 매개변수로 받아 n개의 아이템의 조합 반환
303
+def get_combination(arr, n):
304
+    combination_n = list(itertools.combinations(arr.columns.tolist(),n))
305
+    com_list=[]
306
+    # row i 의 (1,2),(1,3)... 이런식으로 하니까 시간 너무 오래걸림
307
+    # (1,2) 조합에 대한 row i, row i+1, row i+2... 이렇게 바꿈
308
+    for m in range(len(combination_n[n-2])):
309
+        for i in range(len(arr)):
310
+            tmp_list=[]
311
+            temp_df = arr.iloc[i]
312
+            for col in combination_n[m]:
313
+                tmp_list.append(temp_df[col])
314
+            com_list.append(tmp_list)
315
+    return com_list
316
+
317
+def get_prefixspan(n, load_list, save_list, save_df):
318
+    save_list = PrefixSpan(load_list)
319
+    #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 
320
+    # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정
321
+    save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>n)
322
+    save_df = pd.DataFrame(save_list)
323
+    save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
324
+    save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)
325
+    save_df = get_effect(save_df)
326
+    return save_df
327
+
328
+def get_effect(edit_df):
329
+    #Make the new column for filling the Effect
330
+    edit_df['Effect']=np.nan
331
+     #Change the order of columns
332
+    edit_df=edit_df[['Cause','Effect','Frequency']]
333
+    for i in range(len(edit_df)):
334
+        drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
335
+        loc_value = edit_df.loc[i]
336
+        for item in loc_value['Cause']:
337
+            for drule in drules:
338
+                if item == drule:
339
+                    edit_df.loc[i,'Effect'] = item
340
+    return edit_df
341
+
342
+
343
+# In[8]:
344
+
345
+
346
+# 1. 두 아이템의 조합
347
+item_of_two = get_combination(NTM_df,2)
348
+prefix_two=[]
349
+prefix_two_df = pd.DataFrame()
350
+prefix_of_two = get_prefixspan(1, item_of_two, prefix_two, prefix_two_df)
351
+prefix_of_two.to_csv('prefix_of_two.csv',sep=',')
352
+
353
+
354
+# In[9]:
355
+
356
+
357
+# 2. 세 아이템의 조합
358
+item_of_three = get_combination(NTM_df, 3)
359
+prefix_three_tmp=[]
360
+prefix_three_df = pd.DataFrame()
361
+prefix_of_three = get_prefixspan(2, item_of_three, prefix_three_tmp, prefix_three_df)
362
+prefix_of_three
363
+
364
+
365
+# In[ ]:
366
+
367
+
368
+
369
+
370
+
371
+# In[10]:
372
+
373
+
374
+# 3. 네 아이템의 조합
375
+item_of_four = get_combination(NTM_df, 4)
376
+prefix_four_tmp=[]
377
+prefix_four_df = pd.DataFrame()
378
+prefix_of_four = get_prefixspan(3, item_of_four, prefix_four_tmp, prefix_four_df)
379
+
380
+
381
+# In[11]:
382
+
383
+
384
+# 4. 다섯 아이템의 조합
385
+item_of_five = get_combination(NTM_df, 5)
386
+prefix_five_tmp=[]
387
+prefix_five_df = pd.DataFrame()
388
+prefix_of_five = get_prefixspan(4, item_of_five, prefix_five_tmp, prefix_five_df)
389
+prefix_of_five
390
+
391
+
392
+# In[12]:
393
+
394
+
395
+# 5. 여섯 아이템의 조합
396
+item_of_six = get_combination(NTM_df, 6)
397
+prefix_six_tmp=[]
398
+prefix_six_df = pd.DataFrame()
399
+prefix_of_six = get_prefixspan(5, item_of_six, prefix_six_tmp, prefix_six_df)
400
+prefix_of_six
401
+##################### NTM section End #####################
402
+
403
+
404
+# In[13]:
405
+
406
+
407
+##################### MTM section #####################
408
+# Same goes for the MTM section
409
+
410
+# In[375]:
411
+
412
+
413
+MTM_df=df[df['ACCD_FIND_MTD_CODE']==2]
414
+len(MTM_df)
415
+
416
+
417
+# In[376]:
418
+
419
+
420
+# Pick out it in order to get the asset, risk, intent, black IP out
421
+RISK_V2_MTM=MTM_df['RISK_V2']
422
+
423
+RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()
424
+print(RISK_V2_MTM.size)
425
+print(RISK_V2_FILTERED_MTM.size)
426
+
427
+risk_df_MTM = pd.DataFrame()
428
+for newVal_MTM in RISK_V2_FILTERED_MTM:
429
+    newVal_MTM = newVal_MTM.replace("'", "\"")
430
+    newVal_MTM_str = json.loads(newVal_MTM)
431
+    newVal_df_MTM = json_normalize(newVal_MTM_str) 
432
+    risk_df_MTM = pd.concat([risk_df_MTM,newVal_df_MTM],ignore_index=True) 
433
+    
434
+risk_df_col_MTM = risk_df_MTM.columns.values.tolist()
435
+
436
+# In[377]:
437
+
438
+
439
+asset_val_MTM = []
440
+intent_val_MTM=[]
441
+source_val_MTM=[]
442
+
443
+def filter_assets_value_MTM(risk):
444
+    for i in range(len(risk)):
445
+        risks=[]
446
+        intents=[]
447
+        sources=[]
448
+        try:
449
+            for key in risk_df_col:
450
+                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:
451
+                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)
452
+                    risks.append(risk_key_desc)
453
+                if 'INTENT_VAL_' in key and risk.iloc[i][key]:
454
+                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)
455
+                    intents.append(intent_key_desc)
456
+                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:
457
+                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)
458
+                    sources.append(source_key_desc)
459
+        except:
460
+            print(risk)
461
+            print(type(risk))
462
+        finally:
463
+            asset_val_MTM.append(risks)
464
+            intent_val_MTM.append(intents)
465
+            source_val_MTM.append(sources)
466
+
467
+# In[378]:
468
+
469
+# modified
470
+def get_asset_desc_MTM(asset_field):
471
+    if asset_field == 'ASSETS_VAL_1':
472
+        return '공인-전체IP대역(유선)'
473
+    elif asset_field == 'ASSETS_VAL_2':
474
+        return '공인-전체IP대역(무선)'
475
+    elif asset_field == 'ASSETS_VAL_3':
476
+        return '공인-WEB서버'
477
+    elif asset_field == 'ASSETS_VAL_4':
478
+        return '공인-내부응용서버'
479
+    elif asset_field == 'ASSETS_VAL_5':
480
+        return '공인-DB서버'
481
+    elif asset_field == 'ASSETS_VAL_6':
482
+        return '공인-패치서버'
483
+    elif asset_field == 'ASSETS_VAL_7':
484
+        return '공인-네트워크'
485
+    elif asset_field == 'ASSETS_VAL_8':
486
+        return '공인-보안'
487
+    elif asset_field == 'ASSETS_VAL_9':
488
+        return '공인-업무용PC'
489
+    elif asset_field == 'ASSETS_VAL_10':
490
+        return '공인-비업무용PC'
491
+    elif asset_field == 'ASSETS_VAL_11':
492
+        return '공인-기타'
493
+    elif asset_field == 'ASSETS_VAL_12':
494
+        return '사설-전체IP대역(유선)'
495
+    elif asset_field == 'ASSETS_VAL_13':
496
+        return '사설-전체IP대역(무선)'
497
+    elif asset_field == 'ASSETS_VAL_14':
498
+        return '사설-WEB서버'
499
+    elif asset_field == 'ASSETS_VAL_15':
500
+        return '사설-내부응용서버'
501
+    elif asset_field == 'ASSETS_VAL_16':
502
+        return '사설-DB서버'
503
+    elif asset_field == 'ASSETS_VAL_17':
504
+        return '사설-패치서버'
505
+    elif asset_field == 'ASSETS_VAL_18':
506
+        return '사설-네트워크'
507
+    elif asset_field == 'ASSETS_VAL_19':
508
+        return '사설-보안'
509
+    elif asset_field == 'ASSETS_VAL_20':
510
+        return '사설-업무용PC'
511
+    elif asset_field == 'ASSETS_VAL_21':
512
+        return '사설-비업무용PC'
513
+    elif asset_field == 'ASSETS_VAL_22':
514
+        return '사설-기타'
515
+    else:
516
+        return ''
517
+
518
+
519
+# In[381]:
520
+
521
+
522
+# modified
523
+def filter_intent_MTM(intent):
524
+    intents=[]
525
+    for intent_key in intent:
526
+        if 'INTENT_VAL_' in intent_key and intent[intent_key]:
527
+            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
528
+            intents.append(intent_key_desc)
529
+    return intents
530
+
531
+
532
+# In[382]:
533
+
534
+
535
+def get_intent_desc_MTM(intent_field):
536
+    if intent_field == 'INTENT_VAL_1':
537
+        return '파괴'
538
+    elif intent_field == 'INTENT_VAL_2':
539
+        return '유출'
540
+    elif intent_field == 'INTENT_VAL_3':
541
+        return '지연'
542
+    elif intent_field == 'INTENT_VAL_4':
543
+        return '잠복'
544
+    elif intent_field == 'INTENT_VAL_5':
545
+        return '단순침입'
546
+    elif intent_field == 'INTENT_VAL_6':
547
+        return 'MD5'
548
+    elif intent_field == 'INTENT_VAL_0':
549
+        return 'Default'
550
+    else:
551
+        return ''
552
+
553
+
554
+
555
+# In[384]:
556
+
557
+
558
+# modified
559
+def filter_source_MTM(source):
560
+    sources=[]
561
+    for source_key in source:
562
+        if 'SOURCE_VAL_' in source_key and source[source_key]:
563
+            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
564
+            sources.append(source_key_desc)
565
+    return sources
566
+
567
+
568
+# In[385]:
569
+
570
+
571
+def get_source_desc_MTM(source_field):
572
+    if source_field=='SOURCE_VAL_1':
573
+        return '북한IP'
574
+    if source_field=='SOURCE_VAL_3':
575
+        return 'ECSC Black IP'
576
+    else:
577
+        return ''
578
+
579
+
580
+# In[386]:
581
+
582
+filter_assets_value(risk_df_MTM)
583
+#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기
584
+# New assets column
585
+MTM_df['ASSETS_VAL']= asset_val_MTM
586
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)
587
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace('[','', regex=False)
588
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace(']','', regex=False)
589
+MTM_df[:1]
590
+# New column of intent value
591
+MTM_df['INTENT_VAL']=intent_val_MTM
592
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)
593
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace('[','',regex=False)
594
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace(']','',regex=False)
595
+MTM_df[:1]
596
+# New column of SOURCE_VAL value
597
+MTM_df['SOURCE_VAL']=source_val_NTN
598
+MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)
599
+MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)
600
+MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)
601
+MTM_df[:5]
602
+
603
+# In[361]:
604
+MTM_df.drop(columns=['RISK_V2'], inplace=True)
605
+MTM_df.columns
606
+
607
+
608
+# In[388]:
609
+
610
+
611
+MTM_df.isna().sum()
612
+
613
+
614
+# In[389]:
615
+
616
+
617
+# Change the Nan to zero
618
+MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
619
+MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')
620
+MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
621
+MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)
622
+MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)
623
+MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)
624
+MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)
625
+MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
626
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)
627
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)
628
+MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)
629
+MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')
630
+
631
+
632
+# In[390]:
633
+
634
+
635
+# Check NaN out again
636
+MTM_df.isna().sum()
637
+
638
+
639
+# In[391]:
640
+
641
+# ACCD_FIND_MTD_CODE col 지우기
642
+MTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)
643
+
644
+# arr를 매개변수로 받아 n개의 아이템의 조합 반환
645
+def get_combination_MTM(arr, n):
646
+    combination_n = list(itertools.combinations(arr.columns.tolist(),n))
647
+    com_list=[]
648
+    # row i 의 (1,2),(1,3)... 이런식으로 하니까 시간 너무 오래걸림
649
+    # (1,2) 조합에 대한 row i, row i+1, row i+2... 이렇게 바꿈
650
+    for m in range(len(combination_n[n-2])):
651
+        for i in range(len(arr)):
652
+            tmp_list=[]
653
+            temp_df = arr.iloc[i]
654
+            for col in combination_n[m]:
655
+                tmp_list.append(temp_df[col])
656
+            com_list.append(tmp_list)
657
+    return com_list
658
+
659
+def get_prefixspan_MTM(n, load_list, save_list, save_df):
660
+    save_list = PrefixSpan(load_list)
661
+    #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 
662
+    # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정
663
+    save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>n)
664
+    save_df = pd.DataFrame(save_list)
665
+    save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
666
+    save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)
667
+    save_df = get_effect(save_df)
668
+    return save_df
669
+
670
+def get_effect_MTM(edit_df):
671
+    #Make the new column for filling the Effect
672
+    edit_df['Effect']=np.nan
673
+     #Change the order of columns
674
+    edit_df=edit_df[['Cause','Effect','Frequency']]
675
+    for i in range(len(edit_df)):
676
+        drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
677
+        loc_value = edit_df.loc[i]
678
+        for item in loc_value['Cause']:
679
+            for drule in drules:
680
+                if item == drule:
681
+                    edit_df.loc[i,'Effect'] = item
682
+    return edit_df
683
+
684
+# 1. 두 아이템의 조합
685
+item_of_two_MTM = get_combination(MTM_df,2)
686
+prefix_two_MTM=[]
687
+prefix_two_df_MTM = pd.DataFrame()
688
+prefix_of_two_MTM = get_prefixspan(1, item_of_two_MTM, prefix_two_MTM, prefix_two_d_MTMf)
689
+prefix_of_two_MTM
690
+
691
+# 2. 세 아이템의 조합
692
+item_of_three_MTM = get_combination(MTM_df, 3)
693
+prefix_three_tmp_MTM=[]
694
+prefix_three_df_MTM = pd.DataFrame()
695
+prefix_of_three_MTM = get_prefixspan(2, item_of_three_MTM, prefix_three_tmp_MTM, prefix_three_df_MTM)
696
+prefix_of_three_MTM
697
+
698
+# 3. 네 아이템의 조합
699
+item_of_four_MTM = get_combination(MTM_df, 4)
700
+prefix_four_tmp_MTM=[]
701
+prefix_four_df_MTM = pd.DataFrame()
702
+prefix_of_four_MTM = get_prefixspan(3, item_of_four_MTM, prefix_four_tmp_MTM, prefix_four_df_MTM)
703
+
704
+# 4. 다섯 아이템의 조합
705
+item_of_five_MTM = get_combination(MTM_df, 5)
706
+prefix_five_tmp_MTM=[]
707
+prefix_five_df_MTM = pd.DataFrame()
708
+prefix_of_five_MTM = get_prefixspan(4, item_of_five_MTM, prefix_five_tmp_MTM, prefix_five_df_MTM)
709
+prefix_of_five_MTM
710
+
711
+# 5. 여섯 아이템의 조합
712
+item_of_six_MTM = get_combination(MTM_df, 6)
713
+prefix_six_tmp_MTM=[]
714
+prefix_six_df_MTM = pd.DataFrame()
715
+prefix_of_six_MTM = get_prefixspan(5, item_of_six_MTM, prefix_six_tmp_MTM, prefix_six_df_MTM)
716
+prefix_of_six_MTM
717
+
718
+##################### MTM section End #####################
719
+
720
+
721
+# In[ ]:
722
+
723
+
724
+
725
+
726
+
727
+# In[ ]:
728
+
729
+
730
+
731
+

Loading…
Отказ
Запис