Browse Source

삭제 'keris.ipynb/PrefixSpan_edit_20210925.py'

master
yevKwon 4 years ago
parent
commit
676b7ddc30
1 changed files with 0 additions and 792 deletions
  1. 0
    792
      keris.ipynb/PrefixSpan_edit_20210925.py

+ 0
- 792
keris.ipynb/PrefixSpan_edit_20210925.py View File

@@ -1,792 +0,0 @@
1
-#!/usr/bin/env python
2
-# coding: utf-8
3
-
4
-# <p>NTM(유해트래픽 탐지장비)</p>
5
-# <p>MTM(악성파일 탐지장비)</p>
6
-
7
-# In[1]:
8
-
9
-
10
-#!/usr/bin/env python
11
-# coding: utf-8
12
-
13
-import pandas as pd
14
-import numpy as np
15
-from mlxtend.preprocessing import TransactionEncoder
16
-from mlxtend.frequent_patterns import association_rules, fpgrowth
17
-from prefixspan import PrefixSpan
18
-
19
-# load ts_data_accident-2020_sample.csv
20
-# to prevent dtypewarning, set low_memory=False
21
-df = pd.read_csv('ts_data_accident-2020_sample.csv', low_memory=False)
22
-df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()
23
-len(df) #len(df) : 10000, load successful
24
-df.head()
25
-
26
-
27
-# In[2]:
28
-
29
-
30
-##################### NTM section #####################
31
-NTM_df=df[df['ACCD_FIND_MTD_CODE']==1] #* edit'1' to 1
32
-len(NTM_df)
33
-#*NTM_df.head()
34
-
35
-
36
-# In[3]:
37
-
38
-
39
-# Pick out it in order to get the asset, risk, intent, black IP out
40
-RISK_V2=NTM_df['RISK_V2']
41
-
42
-RISK_V2_FILTERED=RISK_V2.dropna()
43
-print(RISK_V2.size)
44
-print(RISK_V2_FILTERED.size)
45
-
46
-#* 추가 : 기존 filter_assets_value 사용시 값을 인식하지 못하는 문제 발생 -> RISK_V2를 별도의 df로 수정
47
-import json
48
-from pandas import json_normalize
49
-risk_df = pd.DataFrame()
50
-for newVal in RISK_V2_FILTERED:
51
-    newVal = newVal.replace("'", "\"")
52
-    newVal_str = json.loads(newVal)
53
-    newVal_df = json_normalize(newVal_str) 
54
-    risk_df = pd.concat([risk_df,newVal_df],ignore_index=True) 
55
-    
56
-risk_df_col = risk_df.columns.values.tolist()
57
-
58
-
59
-# In[4]:
60
-
61
-
62
-# In[352]:
63
-asset_val = []
64
-intent_val=[]
65
-source_val=[]
66
-def filter_assets_value(risk):
67
-    for i in range(len(risk)):
68
-        risks=[]
69
-        intents=[]
70
-        sources=[]
71
-        try:
72
-            for key in risk_df_col:
73
-                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:
74
-                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)
75
-                    risks.append(risk_key_desc)
76
-                if 'INTENT_VAL_' in key and risk.iloc[i][key]:
77
-                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)
78
-                    intents.append(intent_key_desc)
79
-                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:
80
-                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)
81
-                    sources.append(source_key_desc)
82
-        except:
83
-            print(risk)
84
-            print(type(risk))
85
-        finally:
86
-            asset_val.append(risks)
87
-            intent_val.append(intents)
88
-            source_val.append(sources)
89
-    
90
-    
91
-# modified
92
-def get_asset_desc(asset_field):
93
-    if asset_field == 'ASSETS_VAL_1':
94
-        return '공인-전체IP대역(유선)'
95
-    elif asset_field == 'ASSETS_VAL_2':
96
-        return '공인-전체IP대역(무선)'
97
-    elif asset_field == 'ASSETS_VAL_3':
98
-        return '공인-WEB서버'
99
-    elif asset_field == 'ASSETS_VAL_4':
100
-        return '공인-내부응용서버'
101
-    elif asset_field == 'ASSETS_VAL_5':
102
-        return '공인-DB서버'
103
-    elif asset_field == 'ASSETS_VAL_6':
104
-        return '공인-패치서버'
105
-    elif asset_field == 'ASSETS_VAL_7':
106
-        return '공인-네트워크'
107
-    elif asset_field == 'ASSETS_VAL_8':
108
-        return '공인-보안'
109
-    elif asset_field == 'ASSETS_VAL_9':
110
-        return '공인-업무용PC'
111
-    elif asset_field == 'ASSETS_VAL_10':
112
-        return '공인-비업무용PC'
113
-    elif asset_field == 'ASSETS_VAL_11':
114
-        return '공인-기타'
115
-    elif asset_field == 'ASSETS_VAL_12':
116
-        return '사설-전체IP대역(유선)'
117
-    elif asset_field == 'ASSETS_VAL_13':
118
-        return '사설-전체IP대역(무선)'
119
-    elif asset_field == 'ASSETS_VAL_14':
120
-        return '사설-WEB서버'
121
-    elif asset_field == 'ASSETS_VAL_15':
122
-        return '사설-내부응용서버'
123
-    elif asset_field == 'ASSETS_VAL_16':
124
-        return '사설-DB서버'
125
-    elif asset_field == 'ASSETS_VAL_17':
126
-        return '사설-패치서버'
127
-    elif asset_field == 'ASSETS_VAL_18':
128
-        return '사설-네트워크'
129
-    elif asset_field == 'ASSETS_VAL_19':
130
-        return '사설-보안'
131
-    elif asset_field == 'ASSETS_VAL_20':
132
-        return '사설-업무용PC'
133
-    elif asset_field == 'ASSETS_VAL_21':
134
-        return '사설-비업무용PC'
135
-    elif asset_field == 'ASSETS_VAL_22':
136
-        return '사설-기타'
137
-    else:
138
-        return ''
139
-
140
-
141
-
142
-# modified
143
-def filter_intent(intent):
144
-    intents=[]
145
-    for intent_key in intent:
146
-        if 'INTENT_VAL_' in intent_key and intent[intent_key]:
147
-            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
148
-            intents.append(intent_key_desc)
149
-    return intents
150
-
151
-
152
-# In[356]:
153
-
154
-
155
-def get_intent_desc(intent_field):
156
-    if intent_field == 'INTENT_VAL_1':
157
-        return '파괴'
158
-    elif intent_field == 'INTENT_VAL_2':
159
-        return '유출'
160
-    elif intent_field == 'INTENT_VAL_3':
161
-        return '지연'
162
-    elif intent_field == 'INTENT_VAL_4':
163
-        return '잠복'
164
-    elif intent_field == 'INTENT_VAL_5':
165
-        return '단순침입'
166
-    elif intent_field == 'INTENT_VAL_6':
167
-        return 'MD5'
168
-    elif intent_field == 'INTENT_VAL_0':
169
-        return 'Default'
170
-    else:
171
-        return ''
172
-
173
-
174
-# In[358]:
175
-
176
-
177
-# modified
178
-def filter_source(source):
179
-    sources=[]
180
-    for source_key in source:
181
-        if 'SOURCE_VAL_' in source_key and source[source_key]:
182
-            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
183
-            sources.append(source_key_desc)
184
-    return sources
185
-
186
-
187
-# In[359]:
188
-
189
-
190
-def get_source_desc(source_field):
191
-    if source_field=='SOURCE_VAL_1':
192
-        return '북한IP'
193
-    if source_field=='SOURCE_VAL_3':
194
-        return 'ECSC Black IP'
195
-    else:
196
-        return ''
197
-
198
-
199
-
200
-# In[5]:
201
-
202
-
203
-filter_assets_value(risk_df)
204
-#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기
205
-# New assets column
206
-NTM_df['ASSETS_VAL']= asset_val
207
-NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)
208
-NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace('[','', regex=False)
209
-NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace(']','', regex=False)
210
-NTM_df[:1]
211
-# New column of intent value
212
-NTM_df['INTENT_VAL']=intent_val
213
-NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)
214
-NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace('[','',regex=False)
215
-NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace(']','',regex=False)
216
-NTM_df[:1]
217
-# New column of SOURCE_VAL value
218
-NTM_df['SOURCE_VAL']=source_val
219
-NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)
220
-NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)
221
-NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)
222
-NTM_df[:5]
223
-
224
-
225
-# In[ ]:
226
-
227
-
228
-
229
-
230
-
231
-# In[6]:
232
-
233
-
234
-# In[361]:
235
-
236
-
237
-NTM_df.drop(columns=['RISK_V2'], inplace=True)
238
-NTM_df.columns
239
-
240
-# In[362]:
241
-#NTM_df
242
-
243
-
244
-# In[ ]:
245
-
246
-
247
-
248
-
249
-
250
-# In[7]:
251
-
252
-
253
-##################### 여기서부터 진행하시면 됩니다. #####################
254
-##################### 아래 12개 아이템(12. 장비 ACCD_FIND_MTD_CODE 제외)에 대해서 모든 아이템 조합에 알고리즘 적용하기#####################
255
-
256
-# It should be 13 columns in total
257
-
258
-# 1. 기관 INST_NM
259
-# 2. 공격 DRULE_ATT_TYPE_CODE1
260
-# 3. 자산 ASSETS_VAL
261
-# 4. 위협공격ip TW_ATT_IP
262
-# 5. 위협공격port TW_ATT_PORT
263
-# 6. 위협피해ip TW_DMG_IP
264
-# 7. 위협피해port TW_DMG_PORT
265
-# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM
266
-# 9. 공격국가 TW_ATT_CT_NM
267
-# 10. 의도(7개) INTENT_VAL
268
-# 11. IP/URL 가중치 SOURCE_VAL
269
-# 12. 장비 ACCD_FIND_MTD_CODE
270
-# 13. 탐지규칙명 DRULE_NM
271
-
272
-
273
-# In[363]:
274
-NTM_df.isna().sum()
275
-
276
-
277
-# Change the Nan to zero
278
-NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
279
-NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')
280
-NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
281
-NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)
282
-NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)
283
-NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)
284
-NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)
285
-NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
286
-NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)
287
-NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)
288
-NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)
289
-NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')
290
-
291
-
292
-# Check NaN out again
293
-NTM_df.isna().sum()
294
-
295
-
296
-# In[366]:
297
-
298
-
299
-# # Merge all
300
-
301
-# # Make one string from all of elements
302
-NTM_df['Combined']=NTM_df['INST_NM'].astype(str)+' '+NTM_df['TW_ATT_IP'].astype(str)+' '+NTM_df['TW_ATT_PORT'].astype(str)+' '+NTM_df['TW_DMG_IP'].astype(str)+' '+NTM_df['TW_DMG_PORT'].astype(str) +' '+NTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+NTM_df['TW_ATT_CT_NM']+' '+NTM_df['ASSETS_VAL']+' '+NTM_df['INTENT_VAL']+' '+NTM_df['SOURCE_VAL']+' '+NTM_df['DRULE_ATT_TYPE_CODE1']+' '+NTM_df['DRULE_NM']
303
-
304
-NTM_com=NTM_df['Combined']
305
-NTM_com[:10]
306
-
307
-# 수정하여 merge한 부분
308
-NTM_new_com= []
309
-for i in range(0,len(NTM_df)):
310
-    temp_list = []
311
-    temp_list.append([NTM_df['INST_NM'][i],NTM_df['TW_ATT_IP'][i],NTM_df['TW_ATT_PORT'][i], NTM_df['TW_DMG_IP'][i],
312
-                      NTM_df['TW_DMG_PORT'][i], NTM_df['ACCD_DMG_PROTO_NM'][i], NTM_df['TW_ATT_CT_NM'][i], NTM_df['ASSETS_VAL'].loc[i], 
313
-                      NTM_df['INTENT_VAL'].loc[i], NTM_df['SOURCE_VAL'].loc[i], NTM_df['DRULE_ATT_TYPE_CODE1'][i], NTM_df['DRULE_NM'][i]])
314
-    NTM_new_com.extend(temp_list)
315
-    
316
-
317
-# Change the type to DataFrame
318
-NTM_new_to_df=pd.DataFrame(NTM_new_com)
319
-NTM_new_to_df[:5]
320
-NTM_new_to_df.head()
321
-
322
-
323
-# In[8]:
324
-
325
-
326
-# Edit
327
-NTM_new_tolist=NTM_new_to_df.values.tolist()
328
-NTM_new_tolist[:2]
329
-
330
-
331
-# In[9]:
332
-
333
-
334
-from prefixspan import PrefixSpan
335
-# In[370]:
336
-# Apply prefixspan
337
-PrefixSpan_NTM = PrefixSpan(NTM_new_tolist)
338
-prefix_NTM=PrefixSpan_NTM.frequent(1)
339
-prefix_NTM_df=pd.DataFrame(prefix_NTM)
340
-prefix_NTM_df[:5]
341
-
342
-
343
-# In[17]:
344
-
345
-
346
-# Change the columns name
347
-prefix_NTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
348
-
349
-# Make the new column for filling the Effect
350
-prefix_NTM_df['Effect']=np.nan
351
-
352
-# Change the order of columns
353
-prefix_NTM_df=prefix_NTM_df[['Cause','Effect','Frequency']]
354
-
355
-
356
-# 모든 가능한 조합에 대한 시나리오 Frequency 큰 값부터 정렬
357
-prefix_NTM_df= prefix_NTM_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)
358
-
359
-
360
-# In[ ]:
361
-
362
-
363
-# In[373]:
364
-
365
-
366
-# Define the function that find the rule name 
367
-def generate_cause(cell):
368
-    drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
369
-    for i in range(len(prefix_NTM_df)):
370
-        for drule in drules:
371
-            temp_drule = cell.iloc[i]['Cause']
372
-            if drule in temp_drule:
373
-                prefix_NTM_df.iloc[i]['Effect'] = drule
374
-
375
-
376
-generate_cause(prefix_NTM_df)
377
-# Assign the rule name as an effect
378
-prefix_NTM_df.sort_values(by=['Frequency'],ascending=False)
379
-
380
-
381
-# In[ ]:
382
-
383
-
384
-# In[374]:
385
-
386
-
387
-# Attack Filter
388
-def Attack_filter(ps):
389
-    return ' Attack' in ps[0]
390
-
391
-att_filter=prefix_NTM_df[list(map(Attack_filter, prefix_NTM_df.Cause))].fillna('Attack')
392
-
393
-# Malwr Filter
394
-def Malwr_filter(ps):
395
-    return ' Malwr' in ps[0]
396
-
397
-mal_filter=prefix_NTM_df[list(map(Malwr_filter, prefix_NTM_df.Cause))].fillna('Malwr')
398
-
399
-# DDOS Filter
400
-def DDOS_filter(ps):
401
-    return ' DDOS' in ps[0]
402
-
403
-dd_filter=prefix_NTM_df[list(map(DDOS_filter, prefix_NTM_df.Cause))].fillna('DDOS')
404
-
405
-# HACK Filter
406
-def HACK_filter(ps):
407
-    return ' HACK' in ps[0]
408
-
409
-hack_filter=prefix_NTM_df[list(map(HACK_filter, prefix_NTM_df.Cause))].fillna('HACK')
410
-
411
-# MAIL Filter
412
-def MAIL_filter(ps):
413
-    return ' MAIL' in ps[0]
414
-
415
-mail_filter=prefix_NTM_df[list(map(MAIL_filter, prefix_NTM_df.Cause))].fillna('MAIL')
416
-
417
-# WEB Filter
418
-def WEB_filter(ps):
419
-    return ' WEB' in ps[0]
420
-prefix_NTM_df
421
-web_filter=prefix_NTM_df[list(map(WEB_filter, prefix_NTM_df.Cause))].fillna('WEB')
422
-
423
-frames = [att_filter, mal_filter, dd_filter, hack_filter, mail_filter, web_filter]
424
-result = pd.concat(frames)
425
-result.sort_values(by=['Frequency'],ascending=False)
426
-
427
-
428
-# In[ ]:
429
-
430
-
431
-##################### NTM section End #####################
432
-
433
-
434
-# In[ ]:
435
-
436
-
437
-
438
-
439
-
440
-##################### MTM section #####################
441
-
442
-
443
-# In[375]:
444
-
445
-
446
-MTM_df=df[df['ACCD_FIND_MTD_CODE']==2]
447
-len(MTM_df)
448
-
449
-
450
-# In[376]:
451
-
452
-
453
-# Pick out it in order to get the asset, risk, intent, black IP out
454
-RISK_V2_MTM=MTM_df['RISK_V2']
455
-
456
-RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()
457
-print(RISK_V2_MTM.size)
458
-print(RISK_V2_FILTERED_MTM.size)
459
-
460
-
461
-# In[377]:
462
-
463
-
464
-def filter_assets_value_MTM(risk):
465
-  risks=[]
466
-  try:
467
-    for risk_key in risk:
468
-      if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
469
-        risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)
470
-        risks.append(risk_key_desc)
471
-  except:
472
-    print(risk)
473
-    print(type(risk))
474
-  finally:
475
-    return risks
476
-
477
-
478
-# In[378]:
479
-
480
-
481
-# modified
482
-def get_asset_desc_MTM(asset_field):
483
-  if asset_field == 'ASSETS_VAL_1':
484
-    return '공인-전체IP대역(유선)'
485
-  elif asset_field == 'ASSETS_VAL_2':
486
-    return '공인-전체IP대역(무선)'
487
-  elif asset_field == 'ASSETS_VAL_3':
488
-    return '공인-WEB서버'
489
-  elif asset_field == 'ASSETS_VAL_4':
490
-    return '공인-내부응용서버'
491
-  elif asset_field == 'ASSETS_VAL_5':
492
-    return '공인-DB서버'
493
-  elif asset_field == 'ASSETS_VAL_6':
494
-    return '공인-패치서버'
495
-  elif asset_field == 'ASSETS_VAL_7':
496
-    return '공인-네트워크'
497
-  elif asset_field == 'ASSETS_VAL_8':
498
-    return '공인-보안'
499
-  elif asset_field == 'ASSETS_VAL_9':
500
-    return '공인-업무용PC'
501
-  elif asset_field == 'ASSETS_VAL_10':
502
-    return '공인-비업무용PC'
503
-  elif asset_field == 'ASSETS_VAL_11':
504
-    return '공인-기타'
505
-  elif asset_field == 'ASSETS_VAL_12':
506
-    return '사설-전체IP대역(유선)'
507
-  elif asset_field == 'ASSETS_VAL_13':
508
-    return '사설-전체IP대역(무선)'
509
-  elif asset_field == 'ASSETS_VAL_14':
510
-    return '사설-WEB서버'
511
-  elif asset_field == 'ASSETS_VAL_15':
512
-    return '사설-내부응용서버'
513
-  elif asset_field == 'ASSETS_VAL_16':
514
-    return '사설-DB서버'
515
-  elif asset_field == 'ASSETS_VAL_17':
516
-    return '사설-패치서버'
517
-  elif asset_field == 'ASSETS_VAL_18':
518
-    return '사설-네트워크'
519
-  elif asset_field == 'ASSETS_VAL_19':
520
-    return '사설-보안'
521
-  elif asset_field == 'ASSETS_VAL_20':
522
-    return '사설-업무용PC'
523
-  elif asset_field == 'ASSETS_VAL_21':
524
-    return '사설-비업무용PC'
525
-  elif asset_field == 'ASSETS_VAL_22':
526
-    return '사설-기타'
527
-  else:
528
-    return ''
529
-
530
-
531
-# In[379]:
532
-
533
-
534
-# New assets column
535
-MTM_df['ASSETS_VAL']=list(map(filter_assets_value_MTM, RISK_V2_FILTERED_MTM))
536
-MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)
537
-MTM_df[:1]
538
-
539
-
540
-# In[381]:
541
-
542
-
543
-# modified
544
-def filter_intent_MTM(intent):
545
-  intents=[]
546
-  for intent_key in intent:
547
-    if 'INTENT_VAL_' in intent_key and intent[intent_key]:
548
-     intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
549
-     intents.append(intent_key_desc)
550
-  return intents
551
-
552
-
553
-# In[382]:
554
-
555
-
556
-def get_intent_desc_MTM(intent_field):
557
-  if intent_field == 'INTENT_VAL_1':
558
-    return '파괴'
559
-  elif intent_field == 'INTENT_VAL_2':
560
-    return '유출'
561
-  elif intent_field == 'INTENT_VAL_3':
562
-    return '지연'
563
-  elif intent_field == 'INTENT_VAL_4':
564
-    return '잠복'
565
-  elif intent_field == 'INTENT_VAL_5':
566
-    return '단순침입'
567
-  elif intent_field == 'INTENT_VAL_6':
568
-    return 'MD5'
569
-  elif intent_field == 'INTENT_VAL_0':
570
-    return 'Default'
571
-  else:
572
-    return ''
573
-
574
-
575
-# In[383]:
576
-
577
-
578
-# New column of intent value
579
-MTM_df['INTENT_VAL']=list(map(filter_intent_MTM, RISK_V2_FILTERED_MTM))
580
-MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)
581
-MTM_df[:1]
582
-
583
-
584
-# In[384]:
585
-
586
-
587
-# modified
588
-def filter_source_MTM(source):
589
-  sources=[]
590
-  for source_key in source:
591
-    if 'SOURCE_VAL_' in source_key and source[source_key]:
592
-      source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
593
-      sources.append(source_key_desc)
594
-  return sources
595
-
596
-
597
-# In[385]:
598
-
599
-
600
-def get_source_desc_MTM(source_field):
601
-  if source_field=='SOURCE_VAL_1':
602
-    return '북한IP'
603
-  if source_field=='SOURCE_VAL_3':
604
-    return 'ECSC Black IP'
605
-  else:
606
-    return ''
607
-
608
-
609
-# In[386]:
610
-
611
-
612
-# New column of SOURCE_VAL value
613
-MTM_df['SOURCE_VAL']=list(map(filter_source_MTM, RISK_V2_FILTERED_MTM))
614
-MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].astype(str)
615
-MTM_df[:5]
616
-
617
-
618
-# In[387]:
619
-
620
-
621
-MTM_df.drop(columns=['RISK_V2'], inplace=True)
622
-MTM_df.columns
623
-
624
-
625
-# In[388]:
626
-
627
-
628
-MTM_df.isna().sum()
629
-
630
-
631
-# In[389]:
632
-
633
-
634
-# Change the Nan to zero
635
-MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
636
-MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')
637
-MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
638
-MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)
639
-MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)
640
-MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)
641
-MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)
642
-MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
643
-MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)
644
-MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)
645
-MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)
646
-MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')
647
-
648
-
649
-# In[390]:
650
-
651
-
652
-# Check NaN out again
653
-MTM_df.isna().sum()
654
-
655
-
656
-# In[391]:
657
-
658
-
659
-# # Merge all
660
-
661
-# # Make one string from all of elements
662
-MTM_df['Combined']=MTM_df['INST_NM'].astype(str)+' '+MTM_df['TW_ATT_IP'].astype(str)+' '+MTM_df['TW_ATT_PORT'].astype(str)+' '+MTM_df['TW_DMG_IP'].astype(str)+' '+MTM_df['TW_DMG_PORT'].astype(str) +' '+MTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+MTM_df['TW_ATT_CT_NM']+' '+MTM_df['ASSETS_VAL']+' '+MTM_df['INTENT_VAL']+' '+MTM_df['SOURCE_VAL']+' '+MTM_df['DRULE_ATT_TYPE_CODE1']+' '+MTM_df['DRULE_NM']
663
-
664
-MTM_com=MTM_df['Combined']
665
-MTM_com[:10]
666
-
667
-
668
-# In[392]:
669
-
670
-
671
-# Change the type to DataFrame
672
-MTM_to_df=pd.DataFrame(MTM_com)
673
-MTM_to_df[:5]
674
-
675
-
676
-# In[393]:
677
-
678
-
679
-# Change the type to list in order to apply the algorithm(nested list)
680
-MTM_tolist=MTM_to_df.values.tolist()
681
-MTM_tolist[:5]
682
-
683
-
684
-# In[394]:
685
-
686
-
687
-# Apply prefixspan
688
-PrefixSpan_MTM = PrefixSpan(MTM_tolist)
689
-
690
-###### Interchangeable ######
691
-# Get any over frequency 1 
692
-prefix_MTM=PrefixSpan_MTM.frequent(1)
693
-prefix_MTM[:3]
694
-
695
-
696
-# In[395]:
697
-
698
-
699
-# Put the result to DataFrame
700
-prefix_MTM_df=pd.DataFrame(prefix_MTM)
701
-prefix_MTM_df[:5]
702
-
703
-
704
-# In[396]:
705
-
706
-
707
-# Change the columns name
708
-prefix_MTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
709
-
710
-# Make the new column for filling the Effect
711
-prefix_MTM_df['Effect']=np.nan
712
-
713
-# Change the order of columns
714
-prefix_MTM_df=prefix_MTM_df[['Cause','Effect','Frequency']]
715
-prefix_MTM_df[:2]
716
-
717
-
718
-# In[397]:
719
-
720
-
721
-# Define the function that find the rule name 
722
-def generate_cause_MTM(cell):
723
-  drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
724
-  for drule in drules:
725
-    if ' '+drule in cell[0]:
726
-      return drule 
727
-  return ''
728
-      
729
-# Mapping the rule name with cause that is the effect
730
-effect_MTM=list(map(generate_cause, prefix_MTM_df.Cause))
731
-
732
-# Assign the rule name as an effect
733
-prefix_MTM_df['Effect']=effect_MTM
734
-prefix_MTM_df.sort_values(by=['Frequency'],ascending=False)
735
-
736
-
737
-# In[399]:
738
-
739
-
740
-# Attack Filter
741
-def Attack_filter_MTM(ps):
742
-    return ' Attack' in ps[0]
743
-
744
-att_filter_MTM=prefix_MTM_df[list(map(Attack_filter_MTM, prefix_MTM_df.Cause))].fillna('Attack')
745
-
746
-# Malwr Filter
747
-def Malwr_filter_MTM(ps):
748
-    return ' Malwr' in ps[0]
749
-
750
-mal_filter_MTM=prefix_MTM_df[list(map(Malwr_filter_MTM, prefix_MTM_df.Cause))].fillna('Malwr')
751
-
752
-# DDOS Filter
753
-def DDOS_filter_MTM(ps):
754
-    return ' DDOS' in ps[0]
755
-
756
-dd_filter_MTM=prefix_MTM_df[list(map(DDOS_filter_MTM, prefix_MTM_df.Cause))].fillna('DDOS')
757
-
758
-# HACK Filter
759
-def HACK_filter_MTM(ps):
760
-    return ' HACK' in ps[0]
761
-
762
-hack_filter_MTM=prefix_MTM_df[list(map(HACK_filter_MTM, prefix_MTM_df.Cause))].fillna('HACK')
763
-
764
-# MAIL Filter
765
-def MAIL_filter_MTM(ps):
766
-    return ' MAIL' in ps[0]
767
-
768
-mail_filter_MTM=prefix_MTM_df[list(map(MAIL_filter_MTM, prefix_MTM_df.Cause))].fillna('MAIL')
769
-
770
-# WEB Filter
771
-def WEB_filter_MTM(ps):
772
-    return ' WEB' in ps[0]
773
-
774
-prefix_MTM_df[:5]
775
-web_filter_MTM=prefix_MTM_df[list(map(WEB_filter_MTM, prefix_MTM_df.Cause))].fillna('WEB')
776
-
777
-frames_MTM = [att_filter_MTM, mal_filter_MTM, dd_filter_MTM, hack_filter_MTM, mail_filter_MTM, web_filter_MTM]
778
-result_MTM = pd.concat(frames_MTM)
779
-result_MTM.sort_values(by=['Frequency'],ascending=False)
780
-
781
-
782
-# In[ ]:
783
-
784
-
785
-
786
-
787
-
788
-# In[ ]:
789
-
790
-
791
-
792
-

Loading…
Cancel
Save