Pārlūkot izejas kodu

삭제 'keris.ipynb/PrefixSpan_20210925_edit.py'

master
yevKwon 4 gadus atpakaļ
vecāks
revīzija
98e9a65511
1 mainītis faili ar 0 papildinājumiem un 731 dzēšanām
  1. 0
    731
      keris.ipynb/PrefixSpan_20210925_edit.py

+ 0
- 731
keris.ipynb/PrefixSpan_20210925_edit.py Parādīt failu

@@ -1,731 +0,0 @@
1
-#!/usr/bin/env python
2
-# coding: utf-8
3
-
4
-# <p>NTM(유해트래픽 탐지장비)</p>
5
-# <p>MTM(악성파일 탐지장비)</p>
6
-
7
-# In[1]:
8
-
9
-
10
-#!/usr/bin/env python
11
-# coding: utf-8
12
-
13
-import pandas as pd
14
-import numpy as np
15
-from mlxtend.preprocessing import TransactionEncoder
16
-from mlxtend.frequent_patterns import association_rules, fpgrowth
17
-from prefixspan import PrefixSpan
18
-
19
-# load ts_data_accident-2020_sample.csv
20
-# to prevent dtypewarning, set low_memory=False
21
-df = pd.read_csv('ts_data_accident-2020_sample.csv', low_memory=False)
22
-df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()
23
-len(df) #len(df) : 10000, load successful
24
-
25
-##################### NTM section #####################
26
-NTM_df=df[df['ACCD_FIND_MTD_CODE']==1] #* edit'1' to 1
27
-len(NTM_df)
28
-#* NTM_df.head()
29
-
30
-# Pick out it in order to get the asset, risk, intent, black IP out
31
-RISK_V2=NTM_df['RISK_V2']
32
-
33
-RISK_V2_FILTERED=RISK_V2.dropna()
34
-print(RISK_V2.size)
35
-print(RISK_V2_FILTERED.size)
36
-
37
-#* 추가 : 기존 filter_assets_value 사용시 값을 인식하지 못하는 문제 발생 -> RISK_V2를 별도의 df로 수정
38
-import json
39
-from pandas import json_normalize
40
-risk_df = pd.DataFrame()
41
-for newVal in RISK_V2_FILTERED:
42
-    newVal = newVal.replace("'", "\"")
43
-    newVal_str = json.loads(newVal)
44
-    newVal_df = json_normalize(newVal_str) 
45
-    risk_df = pd.concat([risk_df,newVal_df],ignore_index=True) 
46
-    
47
-risk_df_col = risk_df.columns.values.tolist()
48
-
49
-# In[352]:
50
-asset_val = []
51
-intent_val=[]
52
-source_val=[]
53
-def filter_assets_value(risk):
54
-    for i in range(len(risk)):
55
-        risks=[]
56
-        intents=[]
57
-        sources=[]
58
-        try:
59
-            for key in risk_df_col:
60
-                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:
61
-                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)
62
-                    risks.append(risk_key_desc)
63
-                if 'INTENT_VAL_' in key and risk.iloc[i][key]:
64
-                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)
65
-                    intents.append(intent_key_desc)
66
-                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:
67
-                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)
68
-                    sources.append(source_key_desc)
69
-        except:
70
-            print(risk)
71
-            print(type(risk))
72
-        finally:
73
-            asset_val.append(risks)
74
-            intent_val.append(intents)
75
-            source_val.append(sources)
76
-    
77
-    
78
-# modified
79
-def get_asset_desc(asset_field):
80
-    if asset_field == 'ASSETS_VAL_1':
81
-        return '공인-전체IP대역(유선)'
82
-    elif asset_field == 'ASSETS_VAL_2':
83
-        return '공인-전체IP대역(무선)'
84
-    elif asset_field == 'ASSETS_VAL_3':
85
-        return '공인-WEB서버'
86
-    elif asset_field == 'ASSETS_VAL_4':
87
-        return '공인-내부응용서버'
88
-    elif asset_field == 'ASSETS_VAL_5':
89
-        return '공인-DB서버'
90
-    elif asset_field == 'ASSETS_VAL_6':
91
-        return '공인-패치서버'
92
-    elif asset_field == 'ASSETS_VAL_7':
93
-        return '공인-네트워크'
94
-    elif asset_field == 'ASSETS_VAL_8':
95
-        return '공인-보안'
96
-    elif asset_field == 'ASSETS_VAL_9':
97
-        return '공인-업무용PC'
98
-    elif asset_field == 'ASSETS_VAL_10':
99
-        return '공인-비업무용PC'
100
-    elif asset_field == 'ASSETS_VAL_11':
101
-        return '공인-기타'
102
-    elif asset_field == 'ASSETS_VAL_12':
103
-        return '사설-전체IP대역(유선)'
104
-    elif asset_field == 'ASSETS_VAL_13':
105
-        return '사설-전체IP대역(무선)'
106
-    elif asset_field == 'ASSETS_VAL_14':
107
-        return '사설-WEB서버'
108
-    elif asset_field == 'ASSETS_VAL_15':
109
-        return '사설-내부응용서버'
110
-    elif asset_field == 'ASSETS_VAL_16':
111
-        return '사설-DB서버'
112
-    elif asset_field == 'ASSETS_VAL_17':
113
-        return '사설-패치서버'
114
-    elif asset_field == 'ASSETS_VAL_18':
115
-        return '사설-네트워크'
116
-    elif asset_field == 'ASSETS_VAL_19':
117
-        return '사설-보안'
118
-    elif asset_field == 'ASSETS_VAL_20':
119
-        return '사설-업무용PC'
120
-    elif asset_field == 'ASSETS_VAL_21':
121
-        return '사설-비업무용PC'
122
-    elif asset_field == 'ASSETS_VAL_22':
123
-        return '사설-기타'
124
-    else:
125
-        return ''
126
-
127
-
128
-
129
-# modified
130
-def filter_intent(intent):
131
-    intents=[]
132
-    for intent_key in intent:
133
-        if 'INTENT_VAL_' in intent_key and intent[intent_key]:
134
-            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
135
-            intents.append(intent_key_desc)
136
-    return intents
137
-
138
-
139
-# In[356]:
140
-
141
-
142
-def get_intent_desc(intent_field):
143
-    if intent_field == 'INTENT_VAL_1':
144
-        return '파괴'
145
-    elif intent_field == 'INTENT_VAL_2':
146
-        return '유출'
147
-    elif intent_field == 'INTENT_VAL_3':
148
-        return '지연'
149
-    elif intent_field == 'INTENT_VAL_4':
150
-        return '잠복'
151
-    elif intent_field == 'INTENT_VAL_5':
152
-        return '단순침입'
153
-    elif intent_field == 'INTENT_VAL_6':
154
-        return 'MD5'
155
-    elif intent_field == 'INTENT_VAL_0':
156
-        return 'Default'
157
-    else:
158
-        return ''
159
-
160
-
161
-# In[358]:
162
-
163
-
164
-# modified
165
-def filter_source(source):
166
-    sources=[]
167
-    for source_key in source:
168
-        if 'SOURCE_VAL_' in source_key and source[source_key]:
169
-            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
170
-            sources.append(source_key_desc)
171
-    return sources
172
-
173
-
174
-# In[359]:
175
-
176
-
177
-def get_source_desc(source_field):
178
-    if source_field=='SOURCE_VAL_1':
179
-        return '북한IP'
180
-    if source_field=='SOURCE_VAL_3':
181
-        return 'ECSC Black IP'
182
-    else:
183
-        return ''
184
-
185
-
186
-
187
-# In[2]:
188
-
189
-
190
-filter_assets_value(risk_df)
191
-#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기
192
-# New assets column
193
-NTM_df['ASSETS_VAL']= asset_val
194
-NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)
195
-NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace('[','', regex=False)
196
-NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace(']','', regex=False)
197
-NTM_df[:1]
198
-# New column of intent value
199
-NTM_df['INTENT_VAL']=intent_val
200
-NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)
201
-NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace('[','',regex=False)
202
-NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace(']','',regex=False)
203
-NTM_df[:1]
204
-# New column of SOURCE_VAL value
205
-NTM_df['SOURCE_VAL']=source_val
206
-NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)
207
-NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)
208
-NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)
209
-NTM_df[:5]
210
-
211
-# In[361]:
212
-NTM_df.drop(columns=['RISK_V2'], inplace=True)
213
-NTM_df.columns
214
-
215
-
216
-# In[3]:
217
-
218
-
219
-#data frame의 i번째 row를 list로 저장하여 itertools.combinations로 모든 조합 만들 예정
220
-#TW_ATT_IP와 TW_DMG_IP의 값이 같은 경우 어떤 값과의 관계인지 알 수 없으므로 데이터 가공
221
-NTM_df['TW_ATT_IP']="TW_ATT_IP="+NTM_df['TW_ATT_IP'].astype(str)
222
-NTM_df['TW_ATT_PORT']="TW_ATT_PORT="+NTM_df['TW_ATT_PORT'].astype(str)
223
-NTM_df['TW_DMG_IP']="TW_DMG_IP="+NTM_df['TW_DMG_IP'].astype(str)
224
-NTM_df['TW_DMG_PORT']="TW_DMG_PORT="+NTM_df['TW_DMG_PORT'].astype(str)
225
-
226
-
227
-# In[4]:
228
-
229
-
230
-##################### 여기서부터 진행하시면 됩니다. #####################
231
-##################### 아래 12개 아이템(12. 장비 ACCD_FIND_MTD_CODE 제외)에 대해서 모든 아이템 조합에 알고리즘 적용하기#####################
232
-
233
-# It should be 13 columns in total
234
-
235
-# 1. 기관 INST_NM
236
-# 2. 공격 DRULE_ATT_TYPE_CODE1
237
-# 3. 자산 ASSETS_VAL
238
-# 4. 위협공격ip TW_ATT_IP
239
-# 5. 위협공격port TW_ATT_PORT
240
-# 6. 위협피해ip TW_DMG_IP
241
-# 7. 위협피해port TW_DMG_PORT
242
-# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM
243
-# 9. 공격국가 TW_ATT_CT_NM
244
-# 10. 의도(7개) INTENT_VAL
245
-# 11. IP/URL 가중치 SOURCE_VAL
246
-# 12. 장비 ACCD_FIND_MTD_CODE
247
-# 13. 탐지규칙명 DRULE_NM
248
-
249
-
250
-# In[363]:
251
-NTM_df.isna().sum()
252
-
253
-
254
-# Change the Nan to zero
255
-NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
256
-NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')
257
-NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
258
-NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)
259
-NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)
260
-NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)
261
-NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)
262
-NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
263
-NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)
264
-NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)
265
-NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)
266
-NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')
267
-
268
-
269
-# Check NaN out again
270
-NTM_df.isna().sum()
271
-
272
-
273
-# In[5]:
274
-
275
-
276
-# NTM_df의 col을 list로 저장. itertools.combinations로 가능한 시나리오 모두 추출
277
-
278
-# ACCD_FIND_MTD_CODE col 지우기
279
-NTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)
280
-
281
-
282
-# In[6]:
283
-
284
-
285
-# 12의 아이템 중 2개의 조합으로 만들어질 수 있는 모든 시나리오의 갯수 파악
286
-import itertools
287
-item_n=[]
288
-for i in range(2,7):
289
-    temp = itertools.combinations(NTM_df.columns.tolist(), i)
290
-    item_n.append(list(temp))
291
-
292
-#12C4부터 495개의 데이터를 저장하는데에 소요되는 시간이 너무 커서 단순 반복문 사용은 적합하지 않음.
293
-for i in range(len(item_n)):
294
-    print("12C" + str(i+2)+" = "+str(len(item_n[i])))
295
-
296
-
297
-# In[7]:
298
-
299
-
300
-from prefixspan import PrefixSpan
301
-
302
-# arr를 매개변수로 받아 n개의 아이템의 조합 반환
303
-def get_combination(arr, n):
304
-    combination_n = list(itertools.combinations(arr.columns.tolist(),n))
305
-    com_list=[]
306
-    # row i 의 (1,2),(1,3)... 이런식으로 하니까 시간 너무 오래걸림
307
-    # (1,2) 조합에 대한 row i, row i+1, row i+2... 이렇게 바꿈
308
-    for m in range(len(combination_n[n-2])):
309
-        for i in range(len(arr)):
310
-            tmp_list=[]
311
-            temp_df = arr.iloc[i]
312
-            for col in combination_n[m]:
313
-                tmp_list.append(temp_df[col])
314
-            com_list.append(tmp_list)
315
-    return com_list
316
-
317
-def get_prefixspan(n, load_list, save_list, save_df):
318
-    save_list = PrefixSpan(load_list)
319
-    #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 
320
-    # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정
321
-    save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>n)
322
-    save_df = pd.DataFrame(save_list)
323
-    save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
324
-    save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)
325
-    save_df = get_effect(save_df)
326
-    return save_df
327
-
328
-def get_effect(edit_df):
329
-    #Make the new column for filling the Effect
330
-    edit_df['Effect']=np.nan
331
-     #Change the order of columns
332
-    edit_df=edit_df[['Cause','Effect','Frequency']]
333
-    for i in range(len(edit_df)):
334
-        drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
335
-        loc_value = edit_df.loc[i]
336
-        for item in loc_value['Cause']:
337
-            for drule in drules:
338
-                if item == drule:
339
-                    edit_df.loc[i,'Effect'] = item
340
-    return edit_df
341
-
342
-
343
-# In[8]:
344
-
345
-
346
-# 1. 두 아이템의 조합
347
-item_of_two = get_combination(NTM_df,2)
348
-prefix_two=[]
349
-prefix_two_df = pd.DataFrame()
350
-prefix_of_two = get_prefixspan(1, item_of_two, prefix_two, prefix_two_df)
351
-prefix_of_two.to_csv('prefix_of_two.csv',sep=',')
352
-
353
-
354
-# In[9]:
355
-
356
-
357
-# 2. 세 아이템의 조합
358
-item_of_three = get_combination(NTM_df, 3)
359
-prefix_three_tmp=[]
360
-prefix_three_df = pd.DataFrame()
361
-prefix_of_three = get_prefixspan(2, item_of_three, prefix_three_tmp, prefix_three_df)
362
-prefix_of_three
363
-
364
-
365
-# In[ ]:
366
-
367
-
368
-
369
-
370
-
371
-# In[10]:
372
-
373
-
374
-# 3. 네 아이템의 조합
375
-item_of_four = get_combination(NTM_df, 4)
376
-prefix_four_tmp=[]
377
-prefix_four_df = pd.DataFrame()
378
-prefix_of_four = get_prefixspan(3, item_of_four, prefix_four_tmp, prefix_four_df)
379
-
380
-
381
-# In[11]:
382
-
383
-
384
-# 4. 다섯 아이템의 조합
385
-item_of_five = get_combination(NTM_df, 5)
386
-prefix_five_tmp=[]
387
-prefix_five_df = pd.DataFrame()
388
-prefix_of_five = get_prefixspan(4, item_of_five, prefix_five_tmp, prefix_five_df)
389
-prefix_of_five
390
-
391
-
392
-# In[12]:
393
-
394
-
395
-# 5. 여섯 아이템의 조합
396
-item_of_six = get_combination(NTM_df, 6)
397
-prefix_six_tmp=[]
398
-prefix_six_df = pd.DataFrame()
399
-prefix_of_six = get_prefixspan(5, item_of_six, prefix_six_tmp, prefix_six_df)
400
-prefix_of_six
401
-##################### NTM section End #####################
402
-
403
-
404
-# In[13]:
405
-
406
-
407
-##################### MTM section #####################
408
-# Same goes for the MTM section
409
-
410
-# In[375]:
411
-
412
-
413
-MTM_df=df[df['ACCD_FIND_MTD_CODE']==2]
414
-len(MTM_df)
415
-
416
-
417
-# In[376]:
418
-
419
-
420
-# Pick out it in order to get the asset, risk, intent, black IP out
421
-RISK_V2_MTM=MTM_df['RISK_V2']
422
-
423
-RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()
424
-print(RISK_V2_MTM.size)
425
-print(RISK_V2_FILTERED_MTM.size)
426
-
427
-risk_df_MTM = pd.DataFrame()
428
-for newVal_MTM in RISK_V2_FILTERED_MTM:
429
-    newVal_MTM = newVal_MTM.replace("'", "\"")
430
-    newVal_MTM_str = json.loads(newVal_MTM)
431
-    newVal_df_MTM = json_normalize(newVal_MTM_str) 
432
-    risk_df_MTM = pd.concat([risk_df_MTM,newVal_df_MTM],ignore_index=True) 
433
-    
434
-risk_df_col_MTM = risk_df_MTM.columns.values.tolist()
435
-
436
-# In[377]:
437
-
438
-
439
-asset_val_MTM = []
440
-intent_val_MTM=[]
441
-source_val_MTM=[]
442
-
443
-def filter_assets_value_MTM(risk):
444
-    for i in range(len(risk)):
445
-        risks=[]
446
-        intents=[]
447
-        sources=[]
448
-        try:
449
-            for key in risk_df_col:
450
-                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:
451
-                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)
452
-                    risks.append(risk_key_desc)
453
-                if 'INTENT_VAL_' in key and risk.iloc[i][key]:
454
-                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)
455
-                    intents.append(intent_key_desc)
456
-                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:
457
-                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)
458
-                    sources.append(source_key_desc)
459
-        except:
460
-            print(risk)
461
-            print(type(risk))
462
-        finally:
463
-            asset_val_MTM.append(risks)
464
-            intent_val_MTM.append(intents)
465
-            source_val_MTM.append(sources)
466
-
467
-# In[378]:
468
-
469
-# modified
470
-def get_asset_desc_MTM(asset_field):
471
-    if asset_field == 'ASSETS_VAL_1':
472
-        return '공인-전체IP대역(유선)'
473
-    elif asset_field == 'ASSETS_VAL_2':
474
-        return '공인-전체IP대역(무선)'
475
-    elif asset_field == 'ASSETS_VAL_3':
476
-        return '공인-WEB서버'
477
-    elif asset_field == 'ASSETS_VAL_4':
478
-        return '공인-내부응용서버'
479
-    elif asset_field == 'ASSETS_VAL_5':
480
-        return '공인-DB서버'
481
-    elif asset_field == 'ASSETS_VAL_6':
482
-        return '공인-패치서버'
483
-    elif asset_field == 'ASSETS_VAL_7':
484
-        return '공인-네트워크'
485
-    elif asset_field == 'ASSETS_VAL_8':
486
-        return '공인-보안'
487
-    elif asset_field == 'ASSETS_VAL_9':
488
-        return '공인-업무용PC'
489
-    elif asset_field == 'ASSETS_VAL_10':
490
-        return '공인-비업무용PC'
491
-    elif asset_field == 'ASSETS_VAL_11':
492
-        return '공인-기타'
493
-    elif asset_field == 'ASSETS_VAL_12':
494
-        return '사설-전체IP대역(유선)'
495
-    elif asset_field == 'ASSETS_VAL_13':
496
-        return '사설-전체IP대역(무선)'
497
-    elif asset_field == 'ASSETS_VAL_14':
498
-        return '사설-WEB서버'
499
-    elif asset_field == 'ASSETS_VAL_15':
500
-        return '사설-내부응용서버'
501
-    elif asset_field == 'ASSETS_VAL_16':
502
-        return '사설-DB서버'
503
-    elif asset_field == 'ASSETS_VAL_17':
504
-        return '사설-패치서버'
505
-    elif asset_field == 'ASSETS_VAL_18':
506
-        return '사설-네트워크'
507
-    elif asset_field == 'ASSETS_VAL_19':
508
-        return '사설-보안'
509
-    elif asset_field == 'ASSETS_VAL_20':
510
-        return '사설-업무용PC'
511
-    elif asset_field == 'ASSETS_VAL_21':
512
-        return '사설-비업무용PC'
513
-    elif asset_field == 'ASSETS_VAL_22':
514
-        return '사설-기타'
515
-    else:
516
-        return ''
517
-
518
-
519
-# In[381]:
520
-
521
-
522
-# modified
523
-def filter_intent_MTM(intent):
524
-    intents=[]
525
-    for intent_key in intent:
526
-        if 'INTENT_VAL_' in intent_key and intent[intent_key]:
527
-            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
528
-            intents.append(intent_key_desc)
529
-    return intents
530
-
531
-
532
-# In[382]:
533
-
534
-
535
-def get_intent_desc_MTM(intent_field):
536
-    if intent_field == 'INTENT_VAL_1':
537
-        return '파괴'
538
-    elif intent_field == 'INTENT_VAL_2':
539
-        return '유출'
540
-    elif intent_field == 'INTENT_VAL_3':
541
-        return '지연'
542
-    elif intent_field == 'INTENT_VAL_4':
543
-        return '잠복'
544
-    elif intent_field == 'INTENT_VAL_5':
545
-        return '단순침입'
546
-    elif intent_field == 'INTENT_VAL_6':
547
-        return 'MD5'
548
-    elif intent_field == 'INTENT_VAL_0':
549
-        return 'Default'
550
-    else:
551
-        return ''
552
-
553
-
554
-
555
-# In[384]:
556
-
557
-
558
-# modified
559
-def filter_source_MTM(source):
560
-    sources=[]
561
-    for source_key in source:
562
-        if 'SOURCE_VAL_' in source_key and source[source_key]:
563
-            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
564
-            sources.append(source_key_desc)
565
-    return sources
566
-
567
-
568
-# In[385]:
569
-
570
-
571
-def get_source_desc_MTM(source_field):
572
-    if source_field=='SOURCE_VAL_1':
573
-        return '북한IP'
574
-    if source_field=='SOURCE_VAL_3':
575
-        return 'ECSC Black IP'
576
-    else:
577
-        return ''
578
-
579
-
580
-# In[386]:
581
-
582
-filter_assets_value(risk_df_MTM)
583
-#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기
584
-# New assets column
585
-MTM_df['ASSETS_VAL']= asset_val_MTM
586
-MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)
587
-MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace('[','', regex=False)
588
-MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace(']','', regex=False)
589
-MTM_df[:1]
590
-# New column of intent value
591
-MTM_df['INTENT_VAL']=intent_val_MTM
592
-MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)
593
-MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace('[','',regex=False)
594
-MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace(']','',regex=False)
595
-MTM_df[:1]
596
-# New column of SOURCE_VAL value
597
-MTM_df['SOURCE_VAL']=source_val_NTN
598
-MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)
599
-MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)
600
-MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)
601
-MTM_df[:5]
602
-
603
-# In[361]:
604
-MTM_df.drop(columns=['RISK_V2'], inplace=True)
605
-MTM_df.columns
606
-
607
-
608
-# In[388]:
609
-
610
-
611
-MTM_df.isna().sum()
612
-
613
-
614
-# In[389]:
615
-
616
-
617
-# Change the Nan to zero
618
-MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
619
-MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')
620
-MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
621
-MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)
622
-MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)
623
-MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)
624
-MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)
625
-MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
626
-MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)
627
-MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)
628
-MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)
629
-MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')
630
-
631
-
632
-# In[390]:
633
-
634
-
635
-# Check NaN out again
636
-MTM_df.isna().sum()
637
-
638
-
639
-# In[391]:
640
-
641
-# ACCD_FIND_MTD_CODE col 지우기
642
-MTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)
643
-
644
-# arr를 매개변수로 받아 n개의 아이템의 조합 반환
645
-def get_combination_MTM(arr, n):
646
-    combination_n = list(itertools.combinations(arr.columns.tolist(),n))
647
-    com_list=[]
648
-    # row i 의 (1,2),(1,3)... 이런식으로 하니까 시간 너무 오래걸림
649
-    # (1,2) 조합에 대한 row i, row i+1, row i+2... 이렇게 바꿈
650
-    for m in range(len(combination_n[n-2])):
651
-        for i in range(len(arr)):
652
-            tmp_list=[]
653
-            temp_df = arr.iloc[i]
654
-            for col in combination_n[m]:
655
-                tmp_list.append(temp_df[col])
656
-            com_list.append(tmp_list)
657
-    return com_list
658
-
659
-def get_prefixspan_MTM(n, load_list, save_list, save_df):
660
-    save_list = PrefixSpan(load_list)
661
-    #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 
662
-    # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정
663
-    save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>n)
664
-    save_df = pd.DataFrame(save_list)
665
-    save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
666
-    save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)
667
-    save_df = get_effect(save_df)
668
-    return save_df
669
-
670
-def get_effect_MTM(edit_df):
671
-    #Make the new column for filling the Effect
672
-    edit_df['Effect']=np.nan
673
-     #Change the order of columns
674
-    edit_df=edit_df[['Cause','Effect','Frequency']]
675
-    for i in range(len(edit_df)):
676
-        drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
677
-        loc_value = edit_df.loc[i]
678
-        for item in loc_value['Cause']:
679
-            for drule in drules:
680
-                if item == drule:
681
-                    edit_df.loc[i,'Effect'] = item
682
-    return edit_df
683
-
684
-# 1. 두 아이템의 조합
685
-item_of_two_MTM = get_combination(MTM_df,2)
686
-prefix_two_MTM=[]
687
-prefix_two_df_MTM = pd.DataFrame()
688
-prefix_of_two_MTM = get_prefixspan(1, item_of_two_MTM, prefix_two_MTM, prefix_two_d_MTMf)
689
-prefix_of_two_MTM
690
-
691
-# 2. 세 아이템의 조합
692
-item_of_three_MTM = get_combination(MTM_df, 3)
693
-prefix_three_tmp_MTM=[]
694
-prefix_three_df_MTM = pd.DataFrame()
695
-prefix_of_three_MTM = get_prefixspan(2, item_of_three_MTM, prefix_three_tmp_MTM, prefix_three_df_MTM)
696
-prefix_of_three_MTM
697
-
698
-# 3. 네 아이템의 조합
699
-item_of_four_MTM = get_combination(MTM_df, 4)
700
-prefix_four_tmp_MTM=[]
701
-prefix_four_df_MTM = pd.DataFrame()
702
-prefix_of_four_MTM = get_prefixspan(3, item_of_four_MTM, prefix_four_tmp_MTM, prefix_four_df_MTM)
703
-
704
-# 4. 다섯 아이템의 조합
705
-item_of_five_MTM = get_combination(MTM_df, 5)
706
-prefix_five_tmp_MTM=[]
707
-prefix_five_df_MTM = pd.DataFrame()
708
-prefix_of_five_MTM = get_prefixspan(4, item_of_five_MTM, prefix_five_tmp_MTM, prefix_five_df_MTM)
709
-prefix_of_five_MTM
710
-
711
-# 5. 여섯 아이템의 조합
712
-item_of_six_MTM = get_combination(MTM_df, 6)
713
-prefix_six_tmp_MTM=[]
714
-prefix_six_df_MTM = pd.DataFrame()
715
-prefix_of_six_MTM = get_prefixspan(5, item_of_six_MTM, prefix_six_tmp_MTM, prefix_six_df_MTM)
716
-prefix_of_six_MTM
717
-
718
-##################### MTM section End #####################
719
-
720
-
721
-# In[ ]:
722
-
723
-
724
-
725
-
726
-
727
-# In[ ]:
728
-
729
-
730
-
731
-

Notiek ielāde…
Atcelt
Saglabāt