Просмотр исходного кода

upload 'PrefixSpan_20211021.py'

- change .ipynb to .py
- MTM Section 추가
- 'DRULE_ATT_TYPE_CODE1' 데이터 포함
- 한번의 함수 호출로 결과를 추출할 수 있도록 수정
- 공백문자 처리
master
yevKwon 4 лет назад
Родитель
Сommit
741175e094
1 измененных файлов: 677 добавлений и 0 удалений
  1. 677
    0
      PrefixSpan_20211021.py

+ 677
- 0
PrefixSpan_20211021.py Просмотреть файл

1
+#!/usr/bin/env python
2
+# coding: utf-8
3
+
4
+# <p>NTM(유해트래픽 탐지장비)</p>
5
+# <p>MTM(악성파일 탐지장비)</p>
6
+
7
+# In[1]:
8
+
9
+
10
+#!/usr/bin/env python
11
+# coding: utf-8
12
+
13
+import pandas as pd
14
+import numpy as np
15
+from mlxtend.preprocessing import TransactionEncoder
16
+from mlxtend.frequent_patterns import association_rules, fpgrowth
17
+from prefixspan import PrefixSpan
18
+
19
+# load ts_data_accident-2020_sample.csv
20
+# to prevent dtypewarning, set low_memory=False
21
+df = pd.read_csv('ts_data_accident-2020_sample.csv', low_memory=False)
22
+df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()
23
+len(df) #len(df) : 10000, load successful
24
+
25
+##################### NTM section #####################
26
+NTM_df=df[df['ACCD_FIND_MTD_CODE']==1] #* edit'1' to 1
27
+len(NTM_df)
28
+#* NTM_df.head()
29
+
30
+# Pick out it in order to get the asset, risk, intent, black IP out
31
+RISK_V2=NTM_df['RISK_V2']
32
+
33
+RISK_V2_FILTERED=RISK_V2.dropna()
34
+print(RISK_V2.size)
35
+print(RISK_V2_FILTERED.size)
36
+
37
+#* 추가 : 기존 filter_assets_value 사용시 값을 인식하지 못하는 문제 발생 -> RISK_V2를 별도의 df로 수정
38
+import json
39
+from pandas import json_normalize
40
+risk_df = pd.DataFrame()
41
+for newVal in RISK_V2_FILTERED:
42
+    newVal = newVal.replace("'", "\"")
43
+    newVal_str = json.loads(newVal)
44
+    newVal_df = json_normalize(newVal_str) 
45
+    risk_df = pd.concat([risk_df,newVal_df],ignore_index=True) 
46
+    
47
+risk_df_col = risk_df.columns.values.tolist()
48
+
49
+# In[352]:
50
+asset_val = []
51
+intent_val=[]
52
+source_val=[]
53
+def filter_assets_value(risk):
54
+    for i in range(len(risk)):
55
+        risks=[]
56
+        intents=[]
57
+        sources=[]
58
+        try:
59
+            for key in risk_df_col:
60
+                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:
61
+                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)
62
+                    risks.append(risk_key_desc)
63
+                if 'INTENT_VAL_' in key and risk.iloc[i][key]:
64
+                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)
65
+                    intents.append(intent_key_desc)
66
+                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:
67
+                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)
68
+                    sources.append(source_key_desc)
69
+        except:
70
+            print(risk)
71
+            print(type(risk))
72
+        finally:
73
+            asset_val.append(risks)
74
+            intent_val.append(intents)
75
+            source_val.append(sources)
76
+    
77
+    
78
+# modified
79
+def get_asset_desc(asset_field):
80
+    if asset_field == 'ASSETS_VAL_1':
81
+        return '공인-전체IP대역(유선)'
82
+    elif asset_field == 'ASSETS_VAL_2':
83
+        return '공인-전체IP대역(무선)'
84
+    elif asset_field == 'ASSETS_VAL_3':
85
+        return '공인-WEB서버'
86
+    elif asset_field == 'ASSETS_VAL_4':
87
+        return '공인-내부응용서버'
88
+    elif asset_field == 'ASSETS_VAL_5':
89
+        return '공인-DB서버'
90
+    elif asset_field == 'ASSETS_VAL_6':
91
+        return '공인-패치서버'
92
+    elif asset_field == 'ASSETS_VAL_7':
93
+        return '공인-네트워크'
94
+    elif asset_field == 'ASSETS_VAL_8':
95
+        return '공인-보안'
96
+    elif asset_field == 'ASSETS_VAL_9':
97
+        return '공인-업무용PC'
98
+    elif asset_field == 'ASSETS_VAL_10':
99
+        return '공인-비업무용PC'
100
+    elif asset_field == 'ASSETS_VAL_11':
101
+        return '공인-기타'
102
+    elif asset_field == 'ASSETS_VAL_12':
103
+        return '사설-전체IP대역(유선)'
104
+    elif asset_field == 'ASSETS_VAL_13':
105
+        return '사설-전체IP대역(무선)'
106
+    elif asset_field == 'ASSETS_VAL_14':
107
+        return '사설-WEB서버'
108
+    elif asset_field == 'ASSETS_VAL_15':
109
+        return '사설-내부응용서버'
110
+    elif asset_field == 'ASSETS_VAL_16':
111
+        return '사설-DB서버'
112
+    elif asset_field == 'ASSETS_VAL_17':
113
+        return '사설-패치서버'
114
+    elif asset_field == 'ASSETS_VAL_18':
115
+        return '사설-네트워크'
116
+    elif asset_field == 'ASSETS_VAL_19':
117
+        return '사설-보안'
118
+    elif asset_field == 'ASSETS_VAL_20':
119
+        return '사설-업무용PC'
120
+    elif asset_field == 'ASSETS_VAL_21':
121
+        return '사설-비업무용PC'
122
+    elif asset_field == 'ASSETS_VAL_22':
123
+        return '사설-기타'
124
+    else:
125
+        return ''
126
+
127
+
128
+
129
+# modified
130
+def filter_intent(intent):
131
+    intents=[]
132
+    for intent_key in intent:
133
+        if 'INTENT_VAL_' in intent_key and intent[intent_key]:
134
+            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
135
+            intents.append(intent_key_desc)
136
+    return intents
137
+
138
+
139
+# In[356]:
140
+
141
+
142
+def get_intent_desc(intent_field):
143
+    if intent_field == 'INTENT_VAL_1':
144
+        return '파괴'
145
+    elif intent_field == 'INTENT_VAL_2':
146
+        return '유출'
147
+    elif intent_field == 'INTENT_VAL_3':
148
+        return '지연'
149
+    elif intent_field == 'INTENT_VAL_4':
150
+        return '잠복'
151
+    elif intent_field == 'INTENT_VAL_5':
152
+        return '단순침입'
153
+    elif intent_field == 'INTENT_VAL_6':
154
+        return 'MD5'
155
+    elif intent_field == 'INTENT_VAL_0':
156
+        return 'Default'
157
+    else:
158
+        return ''
159
+
160
+
161
+# In[358]:
162
+
163
+
164
+# modified
165
+def filter_source(source):
166
+    sources=[]
167
+    for source_key in source:
168
+        if 'SOURCE_VAL_' in source_key and source[source_key]:
169
+            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
170
+            sources.append(source_key_desc)
171
+    return sources
172
+
173
+
174
+# In[359]:
175
+
176
+
177
+def get_source_desc(source_field):
178
+    if source_field=='SOURCE_VAL_1':
179
+        return '북한IP'
180
+    if source_field=='SOURCE_VAL_3':
181
+        return 'ECSC Black IP'
182
+    else:
183
+        return ''
184
+
185
+
186
+
187
+# In[2]:
188
+
189
+
190
+filter_assets_value(risk_df)
191
+#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기
192
+# New assets column
193
+NTM_df['ASSETS_VAL']= asset_val
194
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)
195
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace('[','', regex=False)
196
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace(']','', regex=False)
197
+NTM_df[:1]
198
+# New column of intent value
199
+NTM_df['INTENT_VAL']=intent_val
200
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)
201
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace('[','',regex=False)
202
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace(']','',regex=False)
203
+NTM_df[:1]
204
+# New column of SOURCE_VAL value
205
+NTM_df['SOURCE_VAL']=source_val
206
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)
207
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)
208
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)
209
+NTM_df[:5]
210
+
211
+# In[361]:
212
+NTM_df.drop(columns=['RISK_V2'], inplace=True)
213
+NTM_df.columns
214
+
215
+
216
+# In[3]:
217
+
218
+
219
+#data frame의 i번째 row를 list로 저장하여 itertools.combinations로 모든 조합 만들 예정
220
+#TW_ATT_IP와 TW_DMG_IP의 값이 같은 경우 어떤 값과의 관계인지 알 수 없으므로 데이터 가공
221
+NTM_df['TW_ATT_IP']="TW_ATT_IP="+NTM_df['TW_ATT_IP'].astype(str)
222
+NTM_df['TW_ATT_PORT']="TW_ATT_PORT="+NTM_df['TW_ATT_PORT'].astype(str)
223
+NTM_df['TW_DMG_IP']="TW_DMG_IP="+NTM_df['TW_DMG_IP'].astype(str)
224
+NTM_df['TW_DMG_PORT']="TW_DMG_PORT="+NTM_df['TW_DMG_PORT'].astype(str)
225
+
226
+
227
+# In[4]:
228
+
229
+
230
+##################### 여기서부터 진행하시면 됩니다. #####################
231
+##################### 아래 12개 아이템(12. 장비 ACCD_FIND_MTD_CODE 제외)에 대해서 모든 아이템 조합에 알고리즘 적용하기#####################
232
+
233
+# It should be 13 columns in total
234
+
235
+# 1. 기관 INST_NM
236
+# 2. 공격 DRULE_ATT_TYPE_CODE1
237
+# 3. 자산 ASSETS_VAL
238
+# 4. 위협공격ip TW_ATT_IP
239
+# 5. 위협공격port TW_ATT_PORT
240
+# 6. 위협피해ip TW_DMG_IP
241
+# 7. 위협피해port TW_DMG_PORT
242
+# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM
243
+# 9. 공격국가 TW_ATT_CT_NM
244
+# 10. 의도(7개) INTENT_VAL
245
+# 11. IP/URL 가중치 SOURCE_VAL
246
+# 12. 장비 ACCD_FIND_MTD_CODE
247
+# 13. 탐지규칙명 DRULE_NM
248
+
249
+
250
+# In[363]:
251
+NTM_df.isna().sum()
252
+
253
+
254
+# Change the Nan to zero
255
+NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
256
+NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')
257
+NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
258
+NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)
259
+NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)
260
+NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)
261
+NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)
262
+NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
263
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)
264
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)
265
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)
266
+NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')
267
+
268
+
269
+# Check NaN out again
270
+NTM_df.isna().sum()
271
+
272
+
273
+# In[5]:
274
+
275
+
276
+# NTM_df의 col을 list로 저장. itertools.combinations로 가능한 시나리오 모두 추출
277
+
278
+# ACCD_FIND_MTD_CODE col 지우기
279
+NTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)
280
+
281
+
282
+# In[6]:
283
+
284
+
285
+from prefixspan import PrefixSpan
286
+import itertools
287
+# arr를 매개변수로 받아 n개의 아이템의 조합 반환
288
+def get_combination(arr, n):
289
+    combination_n = list(itertools.combinations(arr.columns.tolist(),n))
290
+    combination_n = [com for com in combination_n if 'DRULE_ATT_TYPE_CODE1' in com]
291
+    com_list=[]
292
+    # row i 의 (1,2),(1,3)... 이런식으로 하니까 시간 너무 오래걸림
293
+    # (1,2) 조합에 대한 row i, row i+1, row i+2... 이렇게 바꿈
294
+    for m in range(len(combination_n)):
295
+        for i in range(len(arr)):
296
+            temp_list=[]
297
+            temp_df = arr.iloc[i]
298
+            for col in combination_n[m]:
299
+                # 공백 처리
300
+                if(temp_df[col]==''):
301
+                    break
302
+                else:
303
+                    temp_list.append(temp_df[col])
304
+            com_list.append(temp_list)
305
+    prefix = get_prefixspan(com_list)
306
+    return prefix
307
+
308
+def get_prefixspan(load_list):
309
+    n = len(load_list[0])
310
+    save_list = PrefixSpan(load_list)
311
+    #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 
312
+    # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정
313
+    save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>=n)
314
+    save_df = pd.DataFrame(save_list)
315
+    save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
316
+    save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)
317
+    save_df = get_effect(save_df)
318
+    return save_df
319
+
320
+def get_effect(edit_df):
321
+    #Make the new column for filling the Effect
322
+    edit_df['Effect']=np.nan
323
+     #Change the order of columns
324
+    edit_df=edit_df[['Cause','Effect','Frequency']]
325
+    for i in range(len(edit_df)):
326
+        drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
327
+        temp_df = edit_df.loc[i]
328
+        for item in temp_df['Cause']:
329
+            for drule in drules:
330
+                if item == drule:
331
+                    edit_df.loc[i,'Effect'] = item
332
+    return edit_df
333
+
334
+
335
+# In[7]:
336
+
337
+
338
+# 1. 두 아이템의 조합
339
+item = 2
340
+prefix_of_two = get_combination(NTM_df, item)
341
+prefix_of_two
342
+
343
+
344
+# In[8]:
345
+
346
+
347
+# 2. 세 아이템의 조합
348
+prefix_of_three = get_combination(NTM_df, 3)
349
+
350
+
351
+# In[ ]:
352
+
353
+
354
+# 3. 네 아이템의 조합
355
+prefix_of_four =  get_combination(NTM_df, 4)
356
+
357
+
358
+# In[ ]:
359
+
360
+
361
+# 4. 다섯 아이템의 조합
362
+prefix_of_five = get_combination(NTM_df, 5)
363
+
364
+
365
+# In[ ]:
366
+
367
+
368
+# 5. 여섯 아이템의 조합
369
+prefix_of_six  = get_combination(NTM_df, 6)
370
+##################### NTM section End #####################
371
+
372
+
373
+# In[ ]:
374
+
375
+
376
+##################### MTM section #####################
377
+# Same goes for the MTM section
378
+
379
+# In[375]:
380
+
381
+
382
+MTM_df=df[df['ACCD_FIND_MTD_CODE']==2]
383
+len(MTM_df)
384
+
385
+
386
+# In[376]:
387
+
388
+
389
+# Pick out it in order to get the asset, risk, intent, black IP out
390
+RISK_V2_MTM=MTM_df['RISK_V2']
391
+
392
+RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()
393
+print(RISK_V2_MTM.size)
394
+print(RISK_V2_FILTERED_MTM.size)
395
+
396
+risk_df_MTM = pd.DataFrame()
397
+for newVal_MTM in RISK_V2_FILTERED_MTM:
398
+    newVal_MTM = newVal_MTM.replace("'", "\"")
399
+    newVal_MTM_str = json.loads(newVal_MTM)
400
+    newVal_df_MTM = json_normalize(newVal_MTM_str) 
401
+    risk_df_MTM = pd.concat([risk_df_MTM,newVal_df_MTM],ignore_index=True) 
402
+    
403
+risk_df_col_MTM = risk_df_MTM.columns.values.tolist()
404
+
405
+# In[377]:
406
+
407
+
408
+asset_val_MTM = []
409
+intent_val_MTM=[]
410
+source_val_MTM=[]
411
+
412
+def filter_assets_value_MTM(risk):
413
+    for i in range(len(risk)):
414
+        risks=[]
415
+        intents=[]
416
+        sources=[]
417
+        try:
418
+            for key in risk_df_col:
419
+                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:
420
+                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)
421
+                    risks.append(risk_key_desc)
422
+                if 'INTENT_VAL_' in key and risk.iloc[i][key]:
423
+                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)
424
+                    intents.append(intent_key_desc)
425
+                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:
426
+                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)
427
+                    sources.append(source_key_desc)
428
+        except:
429
+            print(risk)
430
+            print(type(risk))
431
+        finally:
432
+            asset_val_MTM.append(risks)
433
+            intent_val_MTM.append(intents)
434
+            source_val_MTM.append(sources)
435
+
436
+# In[378]:
437
+
438
+# modified
439
+def get_asset_desc_MTM(asset_field):
440
+    if asset_field == 'ASSETS_VAL_1':
441
+        return '공인-전체IP대역(유선)'
442
+    elif asset_field == 'ASSETS_VAL_2':
443
+        return '공인-전체IP대역(무선)'
444
+    elif asset_field == 'ASSETS_VAL_3':
445
+        return '공인-WEB서버'
446
+    elif asset_field == 'ASSETS_VAL_4':
447
+        return '공인-내부응용서버'
448
+    elif asset_field == 'ASSETS_VAL_5':
449
+        return '공인-DB서버'
450
+    elif asset_field == 'ASSETS_VAL_6':
451
+        return '공인-패치서버'
452
+    elif asset_field == 'ASSETS_VAL_7':
453
+        return '공인-네트워크'
454
+    elif asset_field == 'ASSETS_VAL_8':
455
+        return '공인-보안'
456
+    elif asset_field == 'ASSETS_VAL_9':
457
+        return '공인-업무용PC'
458
+    elif asset_field == 'ASSETS_VAL_10':
459
+        return '공인-비업무용PC'
460
+    elif asset_field == 'ASSETS_VAL_11':
461
+        return '공인-기타'
462
+    elif asset_field == 'ASSETS_VAL_12':
463
+        return '사설-전체IP대역(유선)'
464
+    elif asset_field == 'ASSETS_VAL_13':
465
+        return '사설-전체IP대역(무선)'
466
+    elif asset_field == 'ASSETS_VAL_14':
467
+        return '사설-WEB서버'
468
+    elif asset_field == 'ASSETS_VAL_15':
469
+        return '사설-내부응용서버'
470
+    elif asset_field == 'ASSETS_VAL_16':
471
+        return '사설-DB서버'
472
+    elif asset_field == 'ASSETS_VAL_17':
473
+        return '사설-패치서버'
474
+    elif asset_field == 'ASSETS_VAL_18':
475
+        return '사설-네트워크'
476
+    elif asset_field == 'ASSETS_VAL_19':
477
+        return '사설-보안'
478
+    elif asset_field == 'ASSETS_VAL_20':
479
+        return '사설-업무용PC'
480
+    elif asset_field == 'ASSETS_VAL_21':
481
+        return '사설-비업무용PC'
482
+    elif asset_field == 'ASSETS_VAL_22':
483
+        return '사설-기타'
484
+    else:
485
+        return ''
486
+
487
+
488
+# In[381]:
489
+
490
+
491
+# modified
492
+def filter_intent_MTM(intent):
493
+    intents=[]
494
+    for intent_key in intent:
495
+        if 'INTENT_VAL_' in intent_key and intent[intent_key]:
496
+            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
497
+            intents.append(intent_key_desc)
498
+    return intents
499
+
500
+
501
+# In[382]:
502
+
503
+
504
+def get_intent_desc_MTM(intent_field):
505
+    if intent_field == 'INTENT_VAL_1':
506
+        return '파괴'
507
+    elif intent_field == 'INTENT_VAL_2':
508
+        return '유출'
509
+    elif intent_field == 'INTENT_VAL_3':
510
+        return '지연'
511
+    elif intent_field == 'INTENT_VAL_4':
512
+        return '잠복'
513
+    elif intent_field == 'INTENT_VAL_5':
514
+        return '단순침입'
515
+    elif intent_field == 'INTENT_VAL_6':
516
+        return 'MD5'
517
+    elif intent_field == 'INTENT_VAL_0':
518
+        return 'Default'
519
+    else:
520
+        return ''
521
+
522
+
523
+
524
+# In[384]:
525
+
526
+
527
+# modified
528
+def filter_source_MTM(source):
529
+    sources=[]
530
+    for source_key in source:
531
+        if 'SOURCE_VAL_' in source_key and source[source_key]:
532
+            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
533
+            sources.append(source_key_desc)
534
+    return sources
535
+
536
+
537
+# In[385]:
538
+
539
+
540
+def get_source_desc_MTM(source_field):
541
+    if source_field=='SOURCE_VAL_1':
542
+        return '북한IP'
543
+    if source_field=='SOURCE_VAL_3':
544
+        return 'ECSC Black IP'
545
+    else:
546
+        return ''
547
+
548
+
549
+# In[386]:
550
+
551
+filter_assets_value(risk_df_MTM)
552
+#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기
553
+# New assets column
554
+MTM_df['ASSETS_VAL']= asset_val_MTM
555
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)
556
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace('[','', regex=False)
557
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace(']','', regex=False)
558
+MTM_df[:1]
559
+# New column of intent value
560
+MTM_df['INTENT_VAL']=intent_val_MTM
561
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)
562
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace('[','',regex=False)
563
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace(']','',regex=False)
564
+MTM_df[:1]
565
+# New column of SOURCE_VAL value
566
+MTM_df['SOURCE_VAL']=source_val_MTM
567
+MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)
568
+MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)
569
+MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)
570
+MTM_df[:5]
571
+
572
+# In[361]:
573
+MTM_df.drop(columns=['RISK_V2'], inplace=True)
574
+MTM_df.columns
575
+
576
+
577
+# In[388]:
578
+
579
+
580
+MTM_df.isna().sum()
581
+
582
+
583
+# In[389]:
584
+
585
+
586
+# Change the Nan to zero
587
+MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
588
+MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')
589
+MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
590
+MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)
591
+MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)
592
+MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)
593
+MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)
594
+MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
595
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)
596
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)
597
+MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)
598
+MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')
599
+
600
+
601
+# In[390]:
602
+
603
+
604
+# Check NaN out again
605
+MTM_df.isna().sum()
606
+
607
+
608
+# In[391]:
609
+
610
+# ACCD_FIND_MTD_CODE col 지우기
611
+MTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)
612
+
613
+# arr를 매개변수로 받아 n개의 아이템의 조합 반환
614
+def get_combination_MTM(arr, n):
615
+    combination_n = list(itertools.combinations(arr.columns.tolist(),n))
616
+    combination_n = [com for com in combination_n if 'DRULE_ATT_TYPE_CODE1' in com]
617
+    com_list=[]
618
+    for m in range(len(combination_n)):
619
+        for i in range(len(arr)):
620
+            temp_list=[]
621
+            temp_df = arr.iloc[i]
622
+            for col in combination_n[m]:
623
+                # 공백 처리
624
+                if(temp_df[col]==''):
625
+                    break
626
+                else:
627
+                    temp_list.append(temp_df[col])
628
+            com_list.append(temp_list)
629
+    prefix = get_prefixspan_MTM(com_list)
630
+    return prefix
631
+
632
+def get_prefixspan_MTM(load_list):
633
+    n = len(load_list[0])
634
+    save_list = PrefixSpan(load_list)
635
+    #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 
636
+    # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정
637
+    save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>=n)
638
+    save_df = pd.DataFrame(save_list)
639
+    save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
640
+    save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)
641
+    save_df = get_effect_MTM(save_df)
642
+    return save_df
643
+
644
+def get_effect_MTM(edit_df):
645
+    #Make the new column for filling the Effect
646
+    edit_df['Effect']=np.nan
647
+     #Change the order of columns
648
+    edit_df=edit_df[['Cause','Effect','Frequency']]
649
+    for i in range(len(edit_df)):
650
+        drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
651
+        temp_df = edit_df.loc[i]
652
+        for item in temp_df['Cause']:
653
+            for drule in drules:
654
+                if item == drule:
655
+                    edit_df.loc[i,'Effect'] = item
656
+    return edit_df
657
+
658
+
659
+
660
+# 1. 두 아이템의 조합
661
+prefix_of_two_MTM = get_combination(MTM_df,2)
662
+
663
+# 2. 세 아이템의 조합
664
+prefix_of_three_MTM = get_combination(MTM_df, 3)
665
+
666
+# 3. 네 아이템의 조합
667
+prefix_of_four_MTM = get_combination(MTM_df, 4)
668
+
669
+# 4. 다섯 아이템의 조합
670
+prefix_of_five_MTM = get_combination(MTM_df, 5)
671
+
672
+
673
+# 5. 여섯 아이템의 조합
674
+prefix_of_six_MTM = get_combination(MTM_df, 6)
675
+
676
+##################### MTM section End #####################
677
+

Загрузка…
Отмена
Сохранить