Ver código fonte

upload edited version

master
yevKwon 4 anos atrás
pai
commit
dd84743e2b
1 arquivos alterados com 792 adições e 0 exclusões
  1. 792
    0
      keris.ipynb/PrefixSpan_edit_20210925.py

+ 792
- 0
keris.ipynb/PrefixSpan_edit_20210925.py Ver arquivo

@@ -0,0 +1,792 @@
1
+#!/usr/bin/env python
2
+# coding: utf-8
3
+
4
+# <p>NTM(유해트래픽 탐지장비)</p>
5
+# <p>MTM(악성파일 탐지장비)</p>
6
+
7
+# In[1]:
8
+
9
+
10
+#!/usr/bin/env python
11
+# coding: utf-8
12
+
13
+import pandas as pd
14
+import numpy as np
15
+from mlxtend.preprocessing import TransactionEncoder
16
+from mlxtend.frequent_patterns import association_rules, fpgrowth
17
+from prefixspan import PrefixSpan
18
+
19
+# load ts_data_accident-2020_sample.csv
20
+# to prevent dtypewarning, set low_memory=False
21
+df = pd.read_csv('ts_data_accident-2020_sample.csv', low_memory=False)
22
+df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()
23
+len(df) #len(df) : 10000, load successful
24
+df.head()
25
+
26
+
27
+# In[2]:
28
+
29
+
30
+##################### NTM section #####################
31
+NTM_df=df[df['ACCD_FIND_MTD_CODE']==1] #* edit'1' to 1
32
+len(NTM_df)
33
+#*NTM_df.head()
34
+
35
+
36
+# In[3]:
37
+
38
+
39
+# Pick out it in order to get the asset, risk, intent, black IP out
40
+RISK_V2=NTM_df['RISK_V2']
41
+
42
+RISK_V2_FILTERED=RISK_V2.dropna()
43
+print(RISK_V2.size)
44
+print(RISK_V2_FILTERED.size)
45
+
46
+#* 추가 : 기존 filter_assets_value 사용시 값을 인식하지 못하는 문제 발생 -> RISK_V2를 별도의 df로 수정
47
+import json
48
+from pandas import json_normalize
49
+risk_df = pd.DataFrame()
50
+for newVal in RISK_V2_FILTERED:
51
+    newVal = newVal.replace("'", "\"")
52
+    newVal_str = json.loads(newVal)
53
+    newVal_df = json_normalize(newVal_str) 
54
+    risk_df = pd.concat([risk_df,newVal_df],ignore_index=True) 
55
+    
56
+risk_df_col = risk_df.columns.values.tolist()
57
+
58
+
59
+# In[4]:
60
+
61
+
62
+# In[352]:
63
+asset_val = []
64
+intent_val=[]
65
+source_val=[]
66
+def filter_assets_value(risk):
67
+    for i in range(len(risk)):
68
+        risks=[]
69
+        intents=[]
70
+        sources=[]
71
+        try:
72
+            for key in risk_df_col:
73
+                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:
74
+                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)
75
+                    risks.append(risk_key_desc)
76
+                if 'INTENT_VAL_' in key and risk.iloc[i][key]:
77
+                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)
78
+                    intents.append(intent_key_desc)
79
+                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:
80
+                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)
81
+                    sources.append(source_key_desc)
82
+        except:
83
+            print(risk)
84
+            print(type(risk))
85
+        finally:
86
+            asset_val.append(risks)
87
+            intent_val.append(intents)
88
+            source_val.append(sources)
89
+    
90
+    
91
+# modified
92
+def get_asset_desc(asset_field):
93
+    if asset_field == 'ASSETS_VAL_1':
94
+        return '공인-전체IP대역(유선)'
95
+    elif asset_field == 'ASSETS_VAL_2':
96
+        return '공인-전체IP대역(무선)'
97
+    elif asset_field == 'ASSETS_VAL_3':
98
+        return '공인-WEB서버'
99
+    elif asset_field == 'ASSETS_VAL_4':
100
+        return '공인-내부응용서버'
101
+    elif asset_field == 'ASSETS_VAL_5':
102
+        return '공인-DB서버'
103
+    elif asset_field == 'ASSETS_VAL_6':
104
+        return '공인-패치서버'
105
+    elif asset_field == 'ASSETS_VAL_7':
106
+        return '공인-네트워크'
107
+    elif asset_field == 'ASSETS_VAL_8':
108
+        return '공인-보안'
109
+    elif asset_field == 'ASSETS_VAL_9':
110
+        return '공인-업무용PC'
111
+    elif asset_field == 'ASSETS_VAL_10':
112
+        return '공인-비업무용PC'
113
+    elif asset_field == 'ASSETS_VAL_11':
114
+        return '공인-기타'
115
+    elif asset_field == 'ASSETS_VAL_12':
116
+        return '사설-전체IP대역(유선)'
117
+    elif asset_field == 'ASSETS_VAL_13':
118
+        return '사설-전체IP대역(무선)'
119
+    elif asset_field == 'ASSETS_VAL_14':
120
+        return '사설-WEB서버'
121
+    elif asset_field == 'ASSETS_VAL_15':
122
+        return '사설-내부응용서버'
123
+    elif asset_field == 'ASSETS_VAL_16':
124
+        return '사설-DB서버'
125
+    elif asset_field == 'ASSETS_VAL_17':
126
+        return '사설-패치서버'
127
+    elif asset_field == 'ASSETS_VAL_18':
128
+        return '사설-네트워크'
129
+    elif asset_field == 'ASSETS_VAL_19':
130
+        return '사설-보안'
131
+    elif asset_field == 'ASSETS_VAL_20':
132
+        return '사설-업무용PC'
133
+    elif asset_field == 'ASSETS_VAL_21':
134
+        return '사설-비업무용PC'
135
+    elif asset_field == 'ASSETS_VAL_22':
136
+        return '사설-기타'
137
+    else:
138
+        return ''
139
+
140
+
141
+
142
+# modified
143
+def filter_intent(intent):
144
+    intents=[]
145
+    for intent_key in intent:
146
+        if 'INTENT_VAL_' in intent_key and intent[intent_key]:
147
+            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
148
+            intents.append(intent_key_desc)
149
+    return intents
150
+
151
+
152
+# In[356]:
153
+
154
+
155
+def get_intent_desc(intent_field):
156
+    if intent_field == 'INTENT_VAL_1':
157
+        return '파괴'
158
+    elif intent_field == 'INTENT_VAL_2':
159
+        return '유출'
160
+    elif intent_field == 'INTENT_VAL_3':
161
+        return '지연'
162
+    elif intent_field == 'INTENT_VAL_4':
163
+        return '잠복'
164
+    elif intent_field == 'INTENT_VAL_5':
165
+        return '단순침입'
166
+    elif intent_field == 'INTENT_VAL_6':
167
+        return 'MD5'
168
+    elif intent_field == 'INTENT_VAL_0':
169
+        return 'Default'
170
+    else:
171
+        return ''
172
+
173
+
174
+# In[358]:
175
+
176
+
177
+# modified
178
+def filter_source(source):
179
+    sources=[]
180
+    for source_key in source:
181
+        if 'SOURCE_VAL_' in source_key and source[source_key]:
182
+            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
183
+            sources.append(source_key_desc)
184
+    return sources
185
+
186
+
187
+# In[359]:
188
+
189
+
190
+def get_source_desc(source_field):
191
+    if source_field=='SOURCE_VAL_1':
192
+        return '북한IP'
193
+    if source_field=='SOURCE_VAL_3':
194
+        return 'ECSC Black IP'
195
+    else:
196
+        return ''
197
+
198
+
199
+
200
+# In[5]:
201
+
202
+
203
+filter_assets_value(risk_df)
204
+#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기
205
+# New assets column
206
+NTM_df['ASSETS_VAL']= asset_val
207
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)
208
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace('[','', regex=False)
209
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace(']','', regex=False)
210
+NTM_df[:1]
211
+# New column of intent value
212
+NTM_df['INTENT_VAL']=intent_val
213
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)
214
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace('[','',regex=False)
215
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace(']','',regex=False)
216
+NTM_df[:1]
217
+# New column of SOURCE_VAL value
218
+NTM_df['SOURCE_VAL']=source_val
219
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)
220
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)
221
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)
222
+NTM_df[:5]
223
+
224
+
225
+# In[ ]:
226
+
227
+
228
+
229
+
230
+
231
+# In[6]:
232
+
233
+
234
+# In[361]:
235
+
236
+
237
+NTM_df.drop(columns=['RISK_V2'], inplace=True)
238
+NTM_df.columns
239
+
240
+# In[362]:
241
+#NTM_df
242
+
243
+
244
+# In[ ]:
245
+
246
+
247
+
248
+
249
+
250
+# In[7]:
251
+
252
+
253
+##################### 여기서부터 진행하시면 됩니다. #####################
254
+##################### 아래 12개 아이템(12. 장비 ACCD_FIND_MTD_CODE 제외)에 대해서 모든 아이템 조합에 알고리즘 적용하기#####################
255
+
256
+# It should be 13 columns in total
257
+
258
+# 1. 기관 INST_NM
259
+# 2. 공격 DRULE_ATT_TYPE_CODE1
260
+# 3. 자산 ASSETS_VAL
261
+# 4. 위협공격ip TW_ATT_IP
262
+# 5. 위협공격port TW_ATT_PORT
263
+# 6. 위협피해ip TW_DMG_IP
264
+# 7. 위협피해port TW_DMG_PORT
265
+# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM
266
+# 9. 공격국가 TW_ATT_CT_NM
267
+# 10. 의도(7개) INTENT_VAL
268
+# 11. IP/URL 가중치 SOURCE_VAL
269
+# 12. 장비 ACCD_FIND_MTD_CODE
270
+# 13. 탐지규칙명 DRULE_NM
271
+
272
+
273
+# In[363]:
274
+NTM_df.isna().sum()
275
+
276
+
277
+# Change the Nan to zero
278
+NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
279
+NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')
280
+NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
281
+NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)
282
+NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)
283
+NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)
284
+NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)
285
+NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
286
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)
287
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)
288
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)
289
+NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')
290
+
291
+
292
+# Check NaN out again
293
+NTM_df.isna().sum()
294
+
295
+
296
+# In[366]:
297
+
298
+
299
+# # Merge all
300
+
301
+# # Make one string from all of elements
302
+NTM_df['Combined']=NTM_df['INST_NM'].astype(str)+' '+NTM_df['TW_ATT_IP'].astype(str)+' '+NTM_df['TW_ATT_PORT'].astype(str)+' '+NTM_df['TW_DMG_IP'].astype(str)+' '+NTM_df['TW_DMG_PORT'].astype(str) +' '+NTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+NTM_df['TW_ATT_CT_NM']+' '+NTM_df['ASSETS_VAL']+' '+NTM_df['INTENT_VAL']+' '+NTM_df['SOURCE_VAL']+' '+NTM_df['DRULE_ATT_TYPE_CODE1']+' '+NTM_df['DRULE_NM']
303
+
304
+NTM_com=NTM_df['Combined']
305
+NTM_com[:10]
306
+
307
+# 수정하여 merge한 부분
308
+NTM_new_com= []
309
+for i in range(0,len(NTM_df)):
310
+    temp_list = []
311
+    temp_list.append([NTM_df['INST_NM'][i],NTM_df['TW_ATT_IP'][i],NTM_df['TW_ATT_PORT'][i], NTM_df['TW_DMG_IP'][i],
312
+                      NTM_df['TW_DMG_PORT'][i], NTM_df['ACCD_DMG_PROTO_NM'][i], NTM_df['TW_ATT_CT_NM'][i], NTM_df['ASSETS_VAL'].loc[i], 
313
+                      NTM_df['INTENT_VAL'].loc[i], NTM_df['SOURCE_VAL'].loc[i], NTM_df['DRULE_ATT_TYPE_CODE1'][i], NTM_df['DRULE_NM'][i]])
314
+    NTM_new_com.extend(temp_list)
315
+    
316
+
317
+# Change the type to DataFrame
318
+NTM_new_to_df=pd.DataFrame(NTM_new_com)
319
+NTM_new_to_df[:5]
320
+NTM_new_to_df.head()
321
+
322
+
323
+# In[8]:
324
+
325
+
326
+# Edit
327
+NTM_new_tolist=NTM_new_to_df.values.tolist()
328
+NTM_new_tolist[:2]
329
+
330
+
331
+# In[9]:
332
+
333
+
334
+from prefixspan import PrefixSpan
335
+# In[370]:
336
+# Apply prefixspan
337
+PrefixSpan_NTM = PrefixSpan(NTM_new_tolist)
338
+prefix_NTM=PrefixSpan_NTM.frequent(1)
339
+prefix_NTM_df=pd.DataFrame(prefix_NTM)
340
+prefix_NTM_df[:5]
341
+
342
+
343
+# In[17]:
344
+
345
+
346
+# Change the columns name
347
+prefix_NTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
348
+
349
+# Make the new column for filling the Effect
350
+prefix_NTM_df['Effect']=np.nan
351
+
352
+# Change the order of columns
353
+prefix_NTM_df=prefix_NTM_df[['Cause','Effect','Frequency']]
354
+
355
+
356
+# 모든 가능한 조합에 대한 시나리오 Frequency 큰 값부터 정렬
357
+prefix_NTM_df= prefix_NTM_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)
358
+
359
+
360
+# In[ ]:
361
+
362
+
363
+# In[373]:
364
+
365
+
366
+# Define the function that find the rule name 
367
+def generate_cause(cell):
368
+    drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
369
+    for i in range(len(prefix_NTM_df)):
370
+        for drule in drules:
371
+            temp_drule = cell.iloc[i]['Cause']
372
+            if drule in temp_drule:
373
+                prefix_NTM_df.iloc[i]['Effect'] = drule
374
+
375
+
376
+generate_cause(prefix_NTM_df)
377
+# Assign the rule name as an effect
378
+prefix_NTM_df.sort_values(by=['Frequency'],ascending=False)
379
+
380
+
381
+# In[ ]:
382
+
383
+
384
+# In[374]:
385
+
386
+
387
+# Attack Filter
388
+def Attack_filter(ps):
389
+    return ' Attack' in ps[0]
390
+
391
+att_filter=prefix_NTM_df[list(map(Attack_filter, prefix_NTM_df.Cause))].fillna('Attack')
392
+
393
+# Malwr Filter
394
+def Malwr_filter(ps):
395
+    return ' Malwr' in ps[0]
396
+
397
+mal_filter=prefix_NTM_df[list(map(Malwr_filter, prefix_NTM_df.Cause))].fillna('Malwr')
398
+
399
+# DDOS Filter
400
+def DDOS_filter(ps):
401
+    return ' DDOS' in ps[0]
402
+
403
+dd_filter=prefix_NTM_df[list(map(DDOS_filter, prefix_NTM_df.Cause))].fillna('DDOS')
404
+
405
+# HACK Filter
406
+def HACK_filter(ps):
407
+    return ' HACK' in ps[0]
408
+
409
+hack_filter=prefix_NTM_df[list(map(HACK_filter, prefix_NTM_df.Cause))].fillna('HACK')
410
+
411
+# MAIL Filter
412
+def MAIL_filter(ps):
413
+    return ' MAIL' in ps[0]
414
+
415
+mail_filter=prefix_NTM_df[list(map(MAIL_filter, prefix_NTM_df.Cause))].fillna('MAIL')
416
+
417
+# WEB Filter
418
+def WEB_filter(ps):
419
+    return ' WEB' in ps[0]
420
+prefix_NTM_df
421
+web_filter=prefix_NTM_df[list(map(WEB_filter, prefix_NTM_df.Cause))].fillna('WEB')
422
+
423
+frames = [att_filter, mal_filter, dd_filter, hack_filter, mail_filter, web_filter]
424
+result = pd.concat(frames)
425
+result.sort_values(by=['Frequency'],ascending=False)
426
+
427
+
428
+# In[ ]:
429
+
430
+
431
+##################### NTM section End #####################
432
+
433
+
434
+# In[ ]:
435
+
436
+
437
+
438
+
439
+
440
+##################### MTM section #####################
441
+
442
+
443
+# In[375]:
444
+
445
+
446
+MTM_df=df[df['ACCD_FIND_MTD_CODE']==2]
447
+len(MTM_df)
448
+
449
+
450
+# In[376]:
451
+
452
+
453
+# Pick out it in order to get the asset, risk, intent, black IP out
454
+RISK_V2_MTM=MTM_df['RISK_V2']
455
+
456
+RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()
457
+print(RISK_V2_MTM.size)
458
+print(RISK_V2_FILTERED_MTM.size)
459
+
460
+
461
+# In[377]:
462
+
463
+
464
+def filter_assets_value_MTM(risk):
465
+  risks=[]
466
+  try:
467
+    for risk_key in risk:
468
+      if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
469
+        risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)
470
+        risks.append(risk_key_desc)
471
+  except:
472
+    print(risk)
473
+    print(type(risk))
474
+  finally:
475
+    return risks
476
+
477
+
478
+# In[378]:
479
+
480
+
481
+# modified
482
+def get_asset_desc_MTM(asset_field):
483
+  if asset_field == 'ASSETS_VAL_1':
484
+    return '공인-전체IP대역(유선)'
485
+  elif asset_field == 'ASSETS_VAL_2':
486
+    return '공인-전체IP대역(무선)'
487
+  elif asset_field == 'ASSETS_VAL_3':
488
+    return '공인-WEB서버'
489
+  elif asset_field == 'ASSETS_VAL_4':
490
+    return '공인-내부응용서버'
491
+  elif asset_field == 'ASSETS_VAL_5':
492
+    return '공인-DB서버'
493
+  elif asset_field == 'ASSETS_VAL_6':
494
+    return '공인-패치서버'
495
+  elif asset_field == 'ASSETS_VAL_7':
496
+    return '공인-네트워크'
497
+  elif asset_field == 'ASSETS_VAL_8':
498
+    return '공인-보안'
499
+  elif asset_field == 'ASSETS_VAL_9':
500
+    return '공인-업무용PC'
501
+  elif asset_field == 'ASSETS_VAL_10':
502
+    return '공인-비업무용PC'
503
+  elif asset_field == 'ASSETS_VAL_11':
504
+    return '공인-기타'
505
+  elif asset_field == 'ASSETS_VAL_12':
506
+    return '사설-전체IP대역(유선)'
507
+  elif asset_field == 'ASSETS_VAL_13':
508
+    return '사설-전체IP대역(무선)'
509
+  elif asset_field == 'ASSETS_VAL_14':
510
+    return '사설-WEB서버'
511
+  elif asset_field == 'ASSETS_VAL_15':
512
+    return '사설-내부응용서버'
513
+  elif asset_field == 'ASSETS_VAL_16':
514
+    return '사설-DB서버'
515
+  elif asset_field == 'ASSETS_VAL_17':
516
+    return '사설-패치서버'
517
+  elif asset_field == 'ASSETS_VAL_18':
518
+    return '사설-네트워크'
519
+  elif asset_field == 'ASSETS_VAL_19':
520
+    return '사설-보안'
521
+  elif asset_field == 'ASSETS_VAL_20':
522
+    return '사설-업무용PC'
523
+  elif asset_field == 'ASSETS_VAL_21':
524
+    return '사설-비업무용PC'
525
+  elif asset_field == 'ASSETS_VAL_22':
526
+    return '사설-기타'
527
+  else:
528
+    return ''
529
+
530
+
531
+# In[379]:
532
+
533
+
534
+# New assets column
535
+MTM_df['ASSETS_VAL']=list(map(filter_assets_value_MTM, RISK_V2_FILTERED_MTM))
536
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)
537
+MTM_df[:1]
538
+
539
+
540
+# In[381]:
541
+
542
+
543
+# modified
544
+def filter_intent_MTM(intent):
545
+  intents=[]
546
+  for intent_key in intent:
547
+    if 'INTENT_VAL_' in intent_key and intent[intent_key]:
548
+     intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
549
+     intents.append(intent_key_desc)
550
+  return intents
551
+
552
+
553
+# In[382]:
554
+
555
+
556
+def get_intent_desc_MTM(intent_field):
557
+  if intent_field == 'INTENT_VAL_1':
558
+    return '파괴'
559
+  elif intent_field == 'INTENT_VAL_2':
560
+    return '유출'
561
+  elif intent_field == 'INTENT_VAL_3':
562
+    return '지연'
563
+  elif intent_field == 'INTENT_VAL_4':
564
+    return '잠복'
565
+  elif intent_field == 'INTENT_VAL_5':
566
+    return '단순침입'
567
+  elif intent_field == 'INTENT_VAL_6':
568
+    return 'MD5'
569
+  elif intent_field == 'INTENT_VAL_0':
570
+    return 'Default'
571
+  else:
572
+    return ''
573
+
574
+
575
+# In[383]:
576
+
577
+
578
+# New column of intent value
579
+MTM_df['INTENT_VAL']=list(map(filter_intent_MTM, RISK_V2_FILTERED_MTM))
580
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)
581
+MTM_df[:1]
582
+
583
+
584
+# In[384]:
585
+
586
+
587
+# modified
588
+def filter_source_MTM(source):
589
+  sources=[]
590
+  for source_key in source:
591
+    if 'SOURCE_VAL_' in source_key and source[source_key]:
592
+      source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
593
+      sources.append(source_key_desc)
594
+  return sources
595
+
596
+
597
+# In[385]:
598
+
599
+
600
+def get_source_desc_MTM(source_field):
601
+  if source_field=='SOURCE_VAL_1':
602
+    return '북한IP'
603
+  if source_field=='SOURCE_VAL_3':
604
+    return 'ECSC Black IP'
605
+  else:
606
+    return ''
607
+
608
+
609
+# In[386]:
610
+
611
+
612
+# New column of SOURCE_VAL value
613
+MTM_df['SOURCE_VAL']=list(map(filter_source_MTM, RISK_V2_FILTERED_MTM))
614
+MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].astype(str)
615
+MTM_df[:5]
616
+
617
+
618
+# In[387]:
619
+
620
+
621
+MTM_df.drop(columns=['RISK_V2'], inplace=True)
622
+MTM_df.columns
623
+
624
+
625
+# In[388]:
626
+
627
+
628
+MTM_df.isna().sum()
629
+
630
+
631
+# In[389]:
632
+
633
+
634
+# Change the Nan to zero
635
+MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
636
+MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')
637
+MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
638
+MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)
639
+MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)
640
+MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)
641
+MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)
642
+MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
643
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)
644
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)
645
+MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)
646
+MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')
647
+
648
+
649
+# In[390]:
650
+
651
+
652
+# Check NaN out again
653
+MTM_df.isna().sum()
654
+
655
+
656
+# In[391]:
657
+
658
+
659
+# # Merge all
660
+
661
+# # Make one string from all of elements
662
+MTM_df['Combined']=MTM_df['INST_NM'].astype(str)+' '+MTM_df['TW_ATT_IP'].astype(str)+' '+MTM_df['TW_ATT_PORT'].astype(str)+' '+MTM_df['TW_DMG_IP'].astype(str)+' '+MTM_df['TW_DMG_PORT'].astype(str) +' '+MTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+MTM_df['TW_ATT_CT_NM']+' '+MTM_df['ASSETS_VAL']+' '+MTM_df['INTENT_VAL']+' '+MTM_df['SOURCE_VAL']+' '+MTM_df['DRULE_ATT_TYPE_CODE1']+' '+MTM_df['DRULE_NM']
663
+
664
+MTM_com=MTM_df['Combined']
665
+MTM_com[:10]
666
+
667
+
668
+# In[392]:
669
+
670
+
671
+# Change the type to DataFrame
672
+MTM_to_df=pd.DataFrame(MTM_com)
673
+MTM_to_df[:5]
674
+
675
+
676
+# In[393]:
677
+
678
+
679
+# Change the type to list in order to apply the algorithm(nested list)
680
+MTM_tolist=MTM_to_df.values.tolist()
681
+MTM_tolist[:5]
682
+
683
+
684
+# In[394]:
685
+
686
+
687
+# Apply prefixspan
688
+PrefixSpan_MTM = PrefixSpan(MTM_tolist)
689
+
690
+###### Interchangeable ######
691
+# Get any over frequency 1 
692
+prefix_MTM=PrefixSpan_MTM.frequent(1)
693
+prefix_MTM[:3]
694
+
695
+
696
+# In[395]:
697
+
698
+
699
+# Put the result to DataFrame
700
+prefix_MTM_df=pd.DataFrame(prefix_MTM)
701
+prefix_MTM_df[:5]
702
+
703
+
704
+# In[396]:
705
+
706
+
707
+# Change the columns name
708
+prefix_MTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
709
+
710
+# Make the new column for filling the Effect
711
+prefix_MTM_df['Effect']=np.nan
712
+
713
+# Change the order of columns
714
+prefix_MTM_df=prefix_MTM_df[['Cause','Effect','Frequency']]
715
+prefix_MTM_df[:2]
716
+
717
+
718
+# In[397]:
719
+
720
+
721
+# Define the function that find the rule name 
722
+def generate_cause_MTM(cell):
723
+  drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
724
+  for drule in drules:
725
+    if ' '+drule in cell[0]:
726
+      return drule 
727
+  return ''
728
+      
729
+# Mapping the rule name with cause that is the effect
730
+effect_MTM=list(map(generate_cause, prefix_MTM_df.Cause))
731
+
732
+# Assign the rule name as an effect
733
+prefix_MTM_df['Effect']=effect_MTM
734
+prefix_MTM_df.sort_values(by=['Frequency'],ascending=False)
735
+
736
+
737
+# In[399]:
738
+
739
+
740
+# Attack Filter
741
+def Attack_filter_MTM(ps):
742
+    return ' Attack' in ps[0]
743
+
744
+att_filter_MTM=prefix_MTM_df[list(map(Attack_filter_MTM, prefix_MTM_df.Cause))].fillna('Attack')
745
+
746
+# Malwr Filter
747
+def Malwr_filter_MTM(ps):
748
+    return ' Malwr' in ps[0]
749
+
750
+mal_filter_MTM=prefix_MTM_df[list(map(Malwr_filter_MTM, prefix_MTM_df.Cause))].fillna('Malwr')
751
+
752
+# DDOS Filter
753
+def DDOS_filter_MTM(ps):
754
+    return ' DDOS' in ps[0]
755
+
756
+dd_filter_MTM=prefix_MTM_df[list(map(DDOS_filter_MTM, prefix_MTM_df.Cause))].fillna('DDOS')
757
+
758
+# HACK Filter
759
+def HACK_filter_MTM(ps):
760
+    return ' HACK' in ps[0]
761
+
762
+hack_filter_MTM=prefix_MTM_df[list(map(HACK_filter_MTM, prefix_MTM_df.Cause))].fillna('HACK')
763
+
764
+# MAIL Filter
765
+def MAIL_filter_MTM(ps):
766
+    return ' MAIL' in ps[0]
767
+
768
+mail_filter_MTM=prefix_MTM_df[list(map(MAIL_filter_MTM, prefix_MTM_df.Cause))].fillna('MAIL')
769
+
770
+# WEB Filter
771
+def WEB_filter_MTM(ps):
772
+    return ' WEB' in ps[0]
773
+
774
+prefix_MTM_df[:5]
775
+web_filter_MTM=prefix_MTM_df[list(map(WEB_filter_MTM, prefix_MTM_df.Cause))].fillna('WEB')
776
+
777
+frames_MTM = [att_filter_MTM, mal_filter_MTM, dd_filter_MTM, hack_filter_MTM, mail_filter_MTM, web_filter_MTM]
778
+result_MTM = pd.concat(frames_MTM)
779
+result_MTM.sort_values(by=['Frequency'],ascending=False)
780
+
781
+
782
+# In[ ]:
783
+
784
+
785
+
786
+
787
+
788
+# In[ ]:
789
+
790
+
791
+
792
+

Carregando…
Cancelar
Salvar