Browse Source

업데이트 'keris.ipynb/PrefixSpan_20210925.py'

master
YoonJoohyun 4 years ago
parent
commit
d907ace4c0
1 changed files with 701 additions and 35 deletions
  1. 701
    35
      keris.ipynb/PrefixSpan_20210925.py

+ 701
- 35
keris.ipynb/PrefixSpan_20210925.py View File

@@ -48,13 +48,14 @@ es = Elasticsearch(hosts=[{'host': '223.194.92.152', 'port': 9200}], scheme="htt
48 48
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
49 49
 
50 50
 
51
-# In[135]:
51
+# In[347]:
52 52
 
53 53
 
54 54
 ######## 2020, 1 year ########
55
+######## There are no MTM data in 2018, 2019 ########
55 56
 
56 57
 body = {
57
-         "size" : 100,
58
+         "size" : 10000,
58 59
          "query": {
59 60
                  "range":{
60 61
                     "TW_COLLECT_DT":{
@@ -62,14 +63,18 @@ body = {
62 63
                         "lte":"2020-12-31T00:00:00.625+09:00" ################
63 64
                     }
64 65
                 }
65
-                 }
66
+                 },
67
+    "sort":[{
68
+        "_id":"asc"
69
+    }]
66 70
 }
67 71
         
68 72
 res = es.search(index = 'ts_data_accident-2020', body=body)
69 73
 data = res['hits']['hits']
74
+nxt=res["hit"]["hit"][-1]["sort"][0]
70 75
 total = res['hits']['total']
71 76
 
72
-print(total)
77
+# print(total)
73 78
 
74 79
 accident = []
75 80
 for da in data:
@@ -78,39 +83,88 @@ for da in data:
78 83
     accident.append(att_type)
79 84
 
80 85
 # df = pd.DataFrame(accident,dtype=str)
81
-df = pd.DataFrame(accident)
86
+df_10000 = pd.DataFrame(accident)
82 87
 
83
-print(df.head())
88
+print(df_10000.head())
84 89
 
85 90
 
86
-# In[136]:
91
+# In[ ]:
92
+
93
+
94
+######## 2020, 1 year ########
95
+######## There are no MTM data in 2018, 2019 ########
96
+
97
+body = {
98
+         "size" : 10000,
99
+         "search_after":[nxt],
100
+         "query": {
101
+                 "range":{
102
+                    "TW_COLLECT_DT":{
103
+                        "gte":"2020-01-01T00:00:00.625+09:00",
104
+                        "lte":"2020-12-31T00:00:00.625+09:00" ################
105
+                    }
106
+                }
107
+                 },
108
+    "sort":[{
109
+        "_id":"asc"
110
+    }]
111
+}
112
+        
113
+res = es.search(index = 'ts_data_accident-2020', body=body)
114
+data = res['hits']['hits']
115
+nxt=res["hit"]["hit"][-1]["sort"][0]
116
+total = res['hits']['total']
117
+
118
+# print(total)
119
+
120
+accident = []
121
+for da in data:
122
+    att_type = da['_source']
123
+    # att_type["POL_NM"]=att_type["SCEN_INFOS"][0]["POL_NM"]
124
+    accident.append(att_type)
125
+
126
+# df = pd.DataFrame(accident,dtype=str)
127
+df_20000 = pd.DataFrame(accident)
128
+
129
+print(df_20000.head())
130
+
131
+
132
+# In[348]:
87 133
 
88 134
 
89
-df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE']]
135
+df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()
136
+len(df)
90 137
 df.head()
91 138
 
92 139
 
93
-# In[248]:
140
+# In[349]:
94 141
 
95 142
 
96
-# import ast
143
+##################### NTM section #####################
144
+
145
+
146
+# In[350]:
147
+
148
+
149
+NTM_df=df[df['ACCD_FIND_MTD_CODE']=='1']
150
+len(NTM_df)
151
+
152
+
153
+# In[351]:
154
+
97 155
 
98 156
 # Pick out it in order to get the asset, risk, intent, black IP out
99
-RISK_V2=df['RISK_V2']
100
-# risk_values=RISK_V2.values
101
-# print(risk_values)
157
+RISK_V2=NTM_df['RISK_V2']
102 158
 
159
+RISK_V2_FILTERED=RISK_V2.dropna()
160
+print(RISK_V2.size)
161
+print(RISK_V2_FILTERED.size)
103 162
 
104
-# print(type(risk_value[0]))
105 163
 
106 164
 
107
-# risk_v2_zero=RISK_V2[0]
108
-# print(RISK_V2.values[:2])
109
-# dict_risk_v2=ast.literal_eval(RISK_V2[0])
110
-# print(dict[0])
111 165
 
112 166
 
113
-# In[229]:
167
+# In[352]:
114 168
 
115 169
 
116 170
 def filter_assets_value(risk):
@@ -118,7 +172,8 @@ def filter_assets_value(risk):
118 172
   try:
119 173
     for risk_key in risk:
120 174
       if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
121
-        risks.append(risk_key)
175
+        risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)
176
+        risks.append(risk_key_desc)
122 177
   except:
123 178
     print(risk)
124 179
     print(type(risk))
@@ -128,24 +183,393 @@ def filter_assets_value(risk):
128 183
   
129 184
 
130 185
 
131
-# In[106]:
186
+# In[353]:
132 187
 
133 188
 
134
-# # modified
135
-# def filter_assets_value(risk):
136
-#   risks=[]
137
-#   for risk_key in risk:
138
-#     if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
139
-#      risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)
140
-#      risks.append(risk_key_desc)
141
-#   return risks
189
+# modified
190
+def get_asset_desc(asset_field):
191
+  if asset_field == 'ASSETS_VAL_1':
192
+    return '공인-전체IP대역(유선)'
193
+  elif asset_field == 'ASSETS_VAL_2':
194
+    return '공인-전체IP대역(무선)'
195
+  elif asset_field == 'ASSETS_VAL_3':
196
+    return '공인-WEB서버'
197
+  elif asset_field == 'ASSETS_VAL_4':
198
+    return '공인-내부응용서버'
199
+  elif asset_field == 'ASSETS_VAL_5':
200
+    return '공인-DB서버'
201
+  elif asset_field == 'ASSETS_VAL_6':
202
+    return '공인-패치서버'
203
+  elif asset_field == 'ASSETS_VAL_7':
204
+    return '공인-네트워크'
205
+  elif asset_field == 'ASSETS_VAL_8':
206
+    return '공인-보안'
207
+  elif asset_field == 'ASSETS_VAL_9':
208
+    return '공인-업무용PC'
209
+  elif asset_field == 'ASSETS_VAL_10':
210
+    return '공인-비업무용PC'
211
+  elif asset_field == 'ASSETS_VAL_11':
212
+    return '공인-기타'
213
+  elif asset_field == 'ASSETS_VAL_12':
214
+    return '사설-전체IP대역(유선)'
215
+  elif asset_field == 'ASSETS_VAL_13':
216
+    return '사설-전체IP대역(무선)'
217
+  elif asset_field == 'ASSETS_VAL_14':
218
+    return '사설-WEB서버'
219
+  elif asset_field == 'ASSETS_VAL_15':
220
+    return '사설-내부응용서버'
221
+  elif asset_field == 'ASSETS_VAL_16':
222
+    return '사설-DB서버'
223
+  elif asset_field == 'ASSETS_VAL_17':
224
+    return '사설-패치서버'
225
+  elif asset_field == 'ASSETS_VAL_18':
226
+    return '사설-네트워크'
227
+  elif asset_field == 'ASSETS_VAL_19':
228
+    return '사설-보안'
229
+  elif asset_field == 'ASSETS_VAL_20':
230
+    return '사설-업무용PC'
231
+  elif asset_field == 'ASSETS_VAL_21':
232
+    return '사설-비업무용PC'
233
+  elif asset_field == 'ASSETS_VAL_22':
234
+    return '사설-기타'
235
+  else:
236
+    return ''
237
+
238
+
239
+# In[354]:
240
+
241
+
242
+# New assets column
243
+NTM_df['ASSETS_VAL']=list(map(filter_assets_value, RISK_V2_FILTERED))
244
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)
245
+NTM_df[:1]
142 246
 
143 247
 
144
-# In[115]:
248
+# In[355]:
145 249
 
146 250
 
147 251
 # modified
148
-def get_asset_desc(asset_field):
252
+def filter_intent(intent):
253
+  intents=[]
254
+  for intent_key in intent:
255
+    if 'INTENT_VAL_' in intent_key and intent[intent_key]:
256
+     intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
257
+     intents.append(intent_key_desc)
258
+  return intents
259
+
260
+
261
+# In[356]:
262
+
263
+
264
+def get_intent_desc(intent_field):
265
+  if intent_field == 'INTENT_VAL_1':
266
+    return '파괴'
267
+  elif intent_field == 'INTENT_VAL_2':
268
+    return '유출'
269
+  elif intent_field == 'INTENT_VAL_3':
270
+    return '지연'
271
+  elif intent_field == 'INTENT_VAL_4':
272
+    return '잠복'
273
+  elif intent_field == 'INTENT_VAL_5':
274
+    return '단순침입'
275
+  elif intent_field == 'INTENT_VAL_6':
276
+    return 'MD5'
277
+  elif intent_field == 'INTENT_VAL_0':
278
+    return 'Default'
279
+  else:
280
+    return ''
281
+
282
+
283
+# In[357]:
284
+
285
+
286
+# New column of intent value
287
+NTM_df['INTENT_VAL']=list(map(filter_intent, RISK_V2_FILTERED))
288
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)
289
+NTM_df[:1]
290
+
291
+
292
+# In[358]:
293
+
294
+
295
+# modified
296
+def filter_source(source):
297
+  sources=[]
298
+  for source_key in source:
299
+    if 'SOURCE_VAL_' in source_key and source[source_key]:
300
+      source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
301
+      sources.append(source_key_desc)
302
+  return sources
303
+
304
+
305
+# In[359]:
306
+
307
+
308
+def get_source_desc(source_field):
309
+  if source_field=='SOURCE_VAL_1':
310
+    return '북한IP'
311
+  if source_field=='SOURCE_VAL_3':
312
+    return 'ECSC Black IP'
313
+  else:
314
+    return ''
315
+
316
+
317
+# In[360]:
318
+
319
+
320
+# New column of SOURCE_VAL value
321
+NTM_df['SOURCE_VAL']=list(map(filter_source, RISK_V2_FILTERED))
322
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)
323
+NTM_df[:5]
324
+
325
+
326
+# In[361]:
327
+
328
+
329
+NTM_df.drop(columns=['RISK_V2'], inplace=True)
330
+NTM_df.columns
331
+
332
+
333
+# In[362]:
334
+
335
+
336
+# It should be 13 columns in total
337
+
338
+# 1. 기관 INST_NM
339
+# 2. 공격 DRULE_ATT_TYPE_CODE1
340
+# 3. 자산 ASSETS_VAL
341
+# 4. 위협공격ip TW_ATT_IP
342
+# 5. 위협공격port TW_ATT_PORT
343
+# 6. 위협피해ip TW_DMG_IP
344
+# 7. 위협피해port TW_DMG_PORT
345
+# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM
346
+# 9. 공격국가 TW_ATT_CT_NM
347
+# 10. 의도(7개) INTENT_VAL
348
+# 11. IP/URL 가중치 SOURCE_VAL
349
+# 12. 장비 ACCD_FIND_MTD_CODE
350
+# 13. 탐지규칙명 DRULE_NM
351
+
352
+
353
+# 
354
+
355
+# In[363]:
356
+
357
+
358
+NTM_df.isna().sum()
359
+
360
+
361
+# In[364]:
362
+
363
+
364
+# Change the Nan to zero
365
+NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
366
+NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')
367
+NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
368
+NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)
369
+NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)
370
+NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)
371
+NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)
372
+NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
373
+NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)
374
+NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)
375
+NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)
376
+NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')
377
+
378
+
379
+# In[365]:
380
+
381
+
382
+# Check NaN out again
383
+NTM_df.isna().sum()
384
+
385
+
386
+# In[366]:
387
+
388
+
389
+# # Merge all
390
+
391
+# # Make one string from all of elements
392
+NTM_df['Combined']=NTM_df['INST_NM'].astype(str)+' '+NTM_df['TW_ATT_IP'].astype(str)
393
++' '+NTM_df['TW_ATT_PORT'].astype(str)+' '+NTM_df['TW_DMG_IP'].astype(str)+' '
394
++NTM_df['TW_DMG_PORT'].astype(str) +' '+NTM_df['ACCD_DMG_PROTO_NM'].astype(str)
395
++' '+NTM_df['TW_ATT_CT_NM']+' '+NTM_df['ASSETS_VAL']+' '+NTM_df['INTENT_VAL']+' '
396
++NTM_df['SOURCE_VAL']+' '+NTM_df['DRULE_ATT_TYPE_CODE1']+' '+NTM_df['DRULE_NM']
397
+
398
+NTM_com=NTM_df['Combined']
399
+NTM_com[:10]
400
+
401
+
402
+# In[367]:
403
+
404
+
405
+# Change the type to DataFrame
406
+NTM_to_df=pd.DataFrame(NTM_com)
407
+NTM_to_df[:5]
408
+
409
+
410
+# In[368]:
411
+
412
+
413
+# Change the type to list in order to apply the algorithm(nested list)
414
+NTM_tolist=NTM_to_df.values.tolist()
415
+NTM_tolist[:5]
416
+
417
+
418
+# In[369]:
419
+
420
+
421
+from prefixspan import PrefixSpan
422
+
423
+
424
+# In[370]:
425
+
426
+
427
+# Apply prefixspan
428
+PrefixSpan_NTM = PrefixSpan(NTM_tolist)
429
+
430
+###### Interchangeable ######
431
+# Get any over frequency 1 
432
+prefix_NTM=PrefixSpan_NTM.frequent(1)
433
+prefix_NTM[:3]
434
+
435
+
436
+# In[371]:
437
+
438
+
439
+# Put the result to DataFrame
440
+prefix_NTM_df=pd.DataFrame(prefix_NTM)
441
+prefix_NTM_df[:5]
442
+
443
+
444
+# In[372]:
445
+
446
+
447
+# Change the columns name
448
+prefix_NTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
449
+
450
+# Make the new column for filling the Effect
451
+prefix_NTM_df['Effect']=np.nan
452
+
453
+# Change the order of columns
454
+prefix_NTM_df=prefix_NTM_df[['Cause','Effect','Frequency']]
455
+prefix_NTM_df[:2]
456
+
457
+
458
+# In[373]:
459
+
460
+
461
+# Define the function that find the rule name 
462
+def generate_cause(cell):
463
+  drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
464
+  for drule in drules:
465
+    if ' '+drule in cell[0]:
466
+      return drule 
467
+  return ''
468
+      
469
+# Mapping the rule name with cause that is the effect
470
+effect=list(map(generate_cause, prefix_NTM_df.Cause))
471
+
472
+# Assign the rule name as an effect
473
+prefix_NTM_df['Effect']=effect
474
+prefix_NTM_df.sort_values(by=['Frequency'],ascending=False)
475
+
476
+
477
+# In[374]:
478
+
479
+
480
+# Attack Filter
481
+def Attack_filter(ps):
482
+    return ' Attack' in ps[0]
483
+
484
+att_filter=prefix_NTM_df[list(map(Attack_filter, prefix_NTM_df.Cause))].fillna('Attack')
485
+
486
+# Malwr Filter
487
+def Malwr_filter(ps):
488
+    return ' Malwr' in ps[0]
489
+
490
+mal_filter=prefix_NTM_df[list(map(Malwr_filter, prefix_NTM_df.Cause))].fillna('Malwr')
491
+
492
+# DDOS Filter
493
+def DDOS_filter(ps):
494
+    return ' DDOS' in ps[0]
495
+
496
+dd_filter=prefix_NTM_df[list(map(DDOS_filter, prefix_NTM_df.Cause))].fillna('DDOS')
497
+
498
+# HACK Filter
499
+def HACK_filter(ps):
500
+    return ' HACK' in ps[0]
501
+
502
+hack_filter=prefix_NTM_df[list(map(HACK_filter, prefix_NTM_df.Cause))].fillna('HACK')
503
+
504
+# MAIL Filter
505
+def MAIL_filter(ps):
506
+    return ' MAIL' in ps[0]
507
+
508
+mail_filter=prefix_NTM_df[list(map(MAIL_filter, prefix_NTM_df.Cause))].fillna('MAIL')
509
+
510
+# WEB Filter
511
+def WEB_filter(ps):
512
+    return ' WEB' in ps[0]
513
+prefix_NTM_df
514
+web_filter=prefix_NTM_df[list(map(WEB_filter, prefix_NTM_df.Cause))].fillna('WEB')
515
+
516
+frames = [att_filter, mal_filter, dd_filter, hack_filter, mail_filter, web_filter]
517
+result = pd.concat(frames)
518
+result.sort_values(by=['Frequency'],ascending=False)
519
+
520
+
521
+# In[ ]:
522
+
523
+
524
+##################### NTM section End #####################
525
+
526
+
527
+# In[ ]:
528
+
529
+
530
+##################### MTM section #####################
531
+
532
+
533
+# In[375]:
534
+
535
+
536
+MTM_df=df[df['ACCD_FIND_MTD_CODE']=='2']
537
+len(MTM_df)
538
+
539
+
540
+# In[376]:
541
+
542
+
543
+# Pick out it in order to get the asset, risk, intent, black IP out
544
+RISK_V2_MTM=MTM_df['RISK_V2']
545
+
546
+RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()
547
+print(RISK_V2_MTM.size)
548
+print(RISK_V2_FILTERED_MTM.size)
549
+
550
+
551
+# In[377]:
552
+
553
+
554
+def filter_assets_value_MTM(risk):
555
+  risks=[]
556
+  try:
557
+    for risk_key in risk:
558
+      if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
559
+        risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)
560
+        risks.append(risk_key_desc)
561
+  except:
562
+    print(risk)
563
+    print(type(risk))
564
+  finally:
565
+    return risks
566
+
567
+
568
+# In[378]:
569
+
570
+
571
+# modified
572
+def get_asset_desc_MTM(asset_field):
149 573
   if asset_field == 'ASSETS_VAL_1':
150 574
     return '공인-전체IP대역(유선)'
151 575
   elif asset_field == 'ASSETS_VAL_2':
@@ -194,13 +618,255 @@ def get_asset_desc(asset_field):
194 618
     return ''
195 619
 
196 620
 
197
-# In[250]:
621
+# In[379]:
198 622
 
199 623
 
200 624
 # New assets column
201
-x=list(map(filter_assets_value, RISK_V2))
202
-# print(list(filter(lambda n:n!='None',df['ASSETS_VAL'])))
203
-len(x)
625
+MTM_df['ASSETS_VAL']=list(map(filter_assets_value_MTM, RISK_V2_FILTERED_MTM))
626
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)
627
+MTM_df[:1]
628
+
629
+
630
+# In[381]:
631
+
632
+
633
+# modified
634
+def filter_intent_MTM(intent):
635
+  intents=[]
636
+  for intent_key in intent:
637
+    if 'INTENT_VAL_' in intent_key and intent[intent_key]:
638
+     intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
639
+     intents.append(intent_key_desc)
640
+  return intents
641
+
642
+
643
+# In[382]:
644
+
645
+
646
+def get_intent_desc_MTM(intent_field):
647
+  if intent_field == 'INTENT_VAL_1':
648
+    return '파괴'
649
+  elif intent_field == 'INTENT_VAL_2':
650
+    return '유출'
651
+  elif intent_field == 'INTENT_VAL_3':
652
+    return '지연'
653
+  elif intent_field == 'INTENT_VAL_4':
654
+    return '잠복'
655
+  elif intent_field == 'INTENT_VAL_5':
656
+    return '단순침입'
657
+  elif intent_field == 'INTENT_VAL_6':
658
+    return 'MD5'
659
+  elif intent_field == 'INTENT_VAL_0':
660
+    return 'Default'
661
+  else:
662
+    return ''
663
+
664
+
665
+# In[383]:
666
+
667
+
668
+# New column of intent value
669
+MTM_df['INTENT_VAL']=list(map(filter_intent_MTM, RISK_V2_FILTERED_MTM))
670
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)
671
+MTM_df[:1]
672
+
673
+
674
+# In[384]:
675
+
676
+
677
+# modified
678
+def filter_source_MTM(source):
679
+  sources=[]
680
+  for source_key in source:
681
+    if 'SOURCE_VAL_' in source_key and source[source_key]:
682
+      source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
683
+      sources.append(source_key_desc)
684
+  return sources
685
+
686
+
687
+# In[385]:
688
+
689
+
690
+def get_source_desc_MTM(source_field):
691
+  if source_field=='SOURCE_VAL_1':
692
+    return '북한IP'
693
+  if source_field=='SOURCE_VAL_3':
694
+    return 'ECSC Black IP'
695
+  else:
696
+    return ''
697
+
698
+
699
+# In[386]:
700
+
701
+
702
+# New column of SOURCE_VAL value
703
+MTM_df['SOURCE_VAL']=list(map(filter_source_MTM, RISK_V2_FILTERED_MTM))
704
+MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].astype(str)
705
+MTM_df[:5]
706
+
707
+
708
+# In[387]:
709
+
710
+
711
+MTM_df.drop(columns=['RISK_V2'], inplace=True)
712
+MTM_df.columns
713
+
714
+
715
+# In[388]:
716
+
717
+
718
+MTM_df.isna().sum()
719
+
720
+
721
+# In[389]:
722
+
723
+
724
+# Change the Nan to zero
725
+MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
726
+MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')
727
+MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
728
+MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)
729
+MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)
730
+MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)
731
+MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)
732
+MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
733
+MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)
734
+MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)
735
+MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)
736
+MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')
737
+
738
+
739
+# In[390]:
740
+
741
+
742
+# Check NaN out again
743
+MTM_df.isna().sum()
744
+
745
+
746
+# In[391]:
747
+
748
+
749
+# # Merge all
750
+
751
+# # Make one string from all of elements
752
+MTM_df['Combined']=MTM_df['INST_NM'].astype(str)+' '+MTM_df['TW_ATT_IP'].astype(str)+' '+MTM_df['TW_ATT_PORT'].astype(str)+' '+MTM_df['TW_DMG_IP'].astype(str)+' '+MTM_df['TW_DMG_PORT'].astype(str) +' '+MTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+MTM_df['TW_ATT_CT_NM']+' '+MTM_df['ASSETS_VAL']+' '+MTM_df['INTENT_VAL']+' '+MTM_df['SOURCE_VAL']+' '+MTM_df['DRULE_ATT_TYPE_CODE1']+' '+MTM_df['DRULE_NM']
753
+
754
+MTM_com=MTM_df['Combined']
755
+MTM_com[:10]
756
+
757
+
758
+# In[392]:
759
+
760
+
761
+# Change the type to DataFrame
762
+MTM_to_df=pd.DataFrame(MTM_com)
763
+MTM_to_df[:5]
764
+
765
+
766
+# In[393]:
767
+
768
+
769
+# Change the type to list in order to apply the algorithm(nested list)
770
+MTM_tolist=MTM_to_df.values.tolist()
771
+MTM_tolist[:5]
772
+
773
+
774
+# In[394]:
775
+
776
+
777
+# Apply prefixspan
778
+PrefixSpan_MTM = PrefixSpan(MTM_tolist)
779
+
780
+###### Interchangeable ######
781
+# Get any over frequency 1 
782
+prefix_MTM=PrefixSpan_MTM.frequent(1)
783
+prefix_MTM[:3]
784
+
785
+
786
+# In[395]:
787
+
788
+
789
+# Put the result to DataFrame
790
+prefix_MTM_df=pd.DataFrame(prefix_MTM)
791
+prefix_MTM_df[:5]
792
+
793
+
794
+# In[396]:
795
+
796
+
797
+# Change the columns name
798
+prefix_MTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
799
+
800
+# Make the new column for filling the Effect
801
+prefix_MTM_df['Effect']=np.nan
802
+
803
+# Change the order of columns
804
+prefix_MTM_df=prefix_MTM_df[['Cause','Effect','Frequency']]
805
+prefix_MTM_df[:2]
806
+
807
+
808
+# In[397]:
809
+
810
+
811
+# Define the function that find the rule name 
812
+def generate_cause_MTM(cell):
813
+  drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
814
+  for drule in drules:
815
+    if ' '+drule in cell[0]:
816
+      return drule 
817
+  return ''
818
+      
819
+# Mapping the rule name with cause that is the effect
820
+effect_MTM=list(map(generate_cause, prefix_MTM_df.Cause))
821
+
822
+# Assign the rule name as an effect
823
+prefix_MTM_df['Effect']=effect_MTM
824
+prefix_MTM_df.sort_values(by=['Frequency'],ascending=False)
825
+
826
+
827
+# In[399]:
828
+
829
+
830
+# Attack Filter
831
+def Attack_filter_MTM(ps):
832
+    return ' Attack' in ps[0]
833
+
834
+att_filter_MTM=prefix_MTM_df[list(map(Attack_filter_MTM, prefix_MTM_df.Cause))].fillna('Attack')
835
+
836
+# Malwr Filter
837
+def Malwr_filter_MTM(ps):
838
+    return ' Malwr' in ps[0]
839
+
840
+mal_filter_MTM=prefix_MTM_df[list(map(Malwr_filter_MTM, prefix_MTM_df.Cause))].fillna('Malwr')
841
+
842
+# DDOS Filter
843
+def DDOS_filter_MTM(ps):
844
+    return ' DDOS' in ps[0]
845
+
846
+dd_filter_MTM=prefix_MTM_df[list(map(DDOS_filter_MTM, prefix_MTM_df.Cause))].fillna('DDOS')
847
+
848
+# HACK Filter
849
+def HACK_filter_MTM(ps):
850
+    return ' HACK' in ps[0]
851
+
852
+hack_filter_MTM=prefix_MTM_df[list(map(HACK_filter_MTM, prefix_MTM_df.Cause))].fillna('HACK')
853
+
854
+# MAIL Filter
855
+def MAIL_filter_MTM(ps):
856
+    return ' MAIL' in ps[0]
857
+
858
+mail_filter_MTM=prefix_MTM_df[list(map(MAIL_filter_MTM, prefix_MTM_df.Cause))].fillna('MAIL')
859
+
860
+# WEB Filter
861
+def WEB_filter_MTM(ps):
862
+    return ' WEB' in ps[0]
863
+
864
+prefix_MTM_df[:5]
865
+web_filter_MTM=prefix_MTM_df[list(map(WEB_filter_MTM, prefix_MTM_df.Cause))].fillna('WEB')
866
+
867
+frames_MTM = [att_filter_MTM, mal_filter_MTM, dd_filter_MTM, hack_filter_MTM, mail_filter_MTM, web_filter_MTM]
868
+result_MTM = pd.concat(frames_MTM)
869
+result_MTM.sort_values(by=['Frequency'],ascending=False)
204 870
 
205 871
 
206 872
 # In[ ]:

Loading…
Cancel
Save