| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876 |
- #!/usr/bin/env python
- # coding: utf-8
-
- # In[1]:
-
-
- import os
- import array
- import math
- import pickle
- # import joblib
- import sys
- import argparse
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from datetime import datetime
- from pprint import pprint
- import ssl
- from elasticsearch.connection import create_ssl_context
- from elasticsearch import Elasticsearch
- from elasticsearch import helpers
- import urllib3
-
-
- # In[3]:
-
-
- import pandas as pd
- import numpy as np
- from mlxtend.preprocessing import TransactionEncoder
- from mlxtend.frequent_patterns import association_rules, fpgrowth
- from prefixspan import PrefixSpan
-
-
- # In[4]:
-
-
- ssl_context = create_ssl_context()
- ssl_context.check_hostname = False
- ssl_context.verify_mode = ssl.CERT_NONE
-
-
- # In[12]:
-
-
- es = Elasticsearch(hosts=[{'host': '223.194.92.152', 'port': 9200}], scheme="http",verify_certs=False, timeout=300, ssl_context=ssl_context, http_auth=("elasticsearch", "hadoop2019@!@#$"))
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-
- # In[347]:
-
-
- ######## 2020, 1 year ########
- ######## There are no MTM data in 2018, 2019 ########
-
- body = {
- "size" : 10000,
- "query": {
- "range":{
- "TW_COLLECT_DT":{
- "gte":"2020-01-01T00:00:00.625+09:00",
- "lte":"2020-12-31T00:00:00.625+09:00" ################
- }
- }
- },
- "sort":[{
- "_id":"asc"
- }]
- }
-
- res = es.search(index = 'ts_data_accident-2020', body=body)
- data = res['hits']['hits']
- nxt=res["hit"]["hit"][-1]["sort"][0]
- total = res['hits']['total']
-
- # print(total)
-
- accident = []
- for da in data:
- att_type = da['_source']
- # att_type["POL_NM"]=att_type["SCEN_INFOS"][0]["POL_NM"]
- accident.append(att_type)
-
- # df = pd.DataFrame(accident,dtype=str)
- df_10000 = pd.DataFrame(accident)
-
- print(df_10000.head())
-
-
- # In[ ]:
-
-
- ######## 2020, 1 year ########
- ######## There are no MTM data in 2018, 2019 ########
-
- body = {
- "size" : 10000,
- "search_after":[nxt],
- "query": {
- "range":{
- "TW_COLLECT_DT":{
- "gte":"2020-01-01T00:00:00.625+09:00",
- "lte":"2020-12-31T00:00:00.625+09:00" ################
- }
- }
- },
- "sort":[{
- "_id":"asc"
- }]
- }
-
- res = es.search(index = 'ts_data_accident-2020', body=body)
- data = res['hits']['hits']
- nxt=res["hit"]["hit"][-1]["sort"][0]
- total = res['hits']['total']
-
- # print(total)
-
- accident = []
- for da in data:
- att_type = da['_source']
- # att_type["POL_NM"]=att_type["SCEN_INFOS"][0]["POL_NM"]
- accident.append(att_type)
-
- # df = pd.DataFrame(accident,dtype=str)
- df_20000 = pd.DataFrame(accident)
-
- print(df_20000.head())
-
-
- # In[348]:
-
-
- df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()
- len(df)
- df.head()
-
-
- # In[349]:
-
-
- ##################### NTM section #####################
-
-
- # In[350]:
-
-
- NTM_df=df[df['ACCD_FIND_MTD_CODE']=='1']
- len(NTM_df)
-
-
- # In[351]:
-
-
- # Pick out it in order to get the asset, risk, intent, black IP out
- RISK_V2=NTM_df['RISK_V2']
-
- RISK_V2_FILTERED=RISK_V2.dropna()
- print(RISK_V2.size)
- print(RISK_V2_FILTERED.size)
-
-
-
-
-
- # In[352]:
-
-
- def filter_assets_value(risk):
- risks=[]
- try:
- for risk_key in risk:
- if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
- risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)
- risks.append(risk_key_desc)
- except:
- print(risk)
- print(type(risk))
- finally:
- return risks
-
-
-
-
- # In[353]:
-
-
- # modified
- def get_asset_desc(asset_field):
- if asset_field == 'ASSETS_VAL_1':
- return '공인-전체IP대역(유선)'
- elif asset_field == 'ASSETS_VAL_2':
- return '공인-전체IP대역(무선)'
- elif asset_field == 'ASSETS_VAL_3':
- return '공인-WEB서버'
- elif asset_field == 'ASSETS_VAL_4':
- return '공인-내부응용서버'
- elif asset_field == 'ASSETS_VAL_5':
- return '공인-DB서버'
- elif asset_field == 'ASSETS_VAL_6':
- return '공인-패치서버'
- elif asset_field == 'ASSETS_VAL_7':
- return '공인-네트워크'
- elif asset_field == 'ASSETS_VAL_8':
- return '공인-보안'
- elif asset_field == 'ASSETS_VAL_9':
- return '공인-업무용PC'
- elif asset_field == 'ASSETS_VAL_10':
- return '공인-비업무용PC'
- elif asset_field == 'ASSETS_VAL_11':
- return '공인-기타'
- elif asset_field == 'ASSETS_VAL_12':
- return '사설-전체IP대역(유선)'
- elif asset_field == 'ASSETS_VAL_13':
- return '사설-전체IP대역(무선)'
- elif asset_field == 'ASSETS_VAL_14':
- return '사설-WEB서버'
- elif asset_field == 'ASSETS_VAL_15':
- return '사설-내부응용서버'
- elif asset_field == 'ASSETS_VAL_16':
- return '사설-DB서버'
- elif asset_field == 'ASSETS_VAL_17':
- return '사설-패치서버'
- elif asset_field == 'ASSETS_VAL_18':
- return '사설-네트워크'
- elif asset_field == 'ASSETS_VAL_19':
- return '사설-보안'
- elif asset_field == 'ASSETS_VAL_20':
- return '사설-업무용PC'
- elif asset_field == 'ASSETS_VAL_21':
- return '사설-비업무용PC'
- elif asset_field == 'ASSETS_VAL_22':
- return '사설-기타'
- else:
- return ''
-
-
- # In[354]:
-
-
- # New assets column
- NTM_df['ASSETS_VAL']=list(map(filter_assets_value, RISK_V2_FILTERED))
- NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)
- NTM_df[:1]
-
-
- # In[355]:
-
-
- # modified
- def filter_intent(intent):
- intents=[]
- for intent_key in intent:
- if 'INTENT_VAL_' in intent_key and intent[intent_key]:
- intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
- intents.append(intent_key_desc)
- return intents
-
-
- # In[356]:
-
-
- def get_intent_desc(intent_field):
- if intent_field == 'INTENT_VAL_1':
- return '파괴'
- elif intent_field == 'INTENT_VAL_2':
- return '유출'
- elif intent_field == 'INTENT_VAL_3':
- return '지연'
- elif intent_field == 'INTENT_VAL_4':
- return '잠복'
- elif intent_field == 'INTENT_VAL_5':
- return '단순침입'
- elif intent_field == 'INTENT_VAL_6':
- return 'MD5'
- elif intent_field == 'INTENT_VAL_0':
- return 'Default'
- else:
- return ''
-
-
- # In[357]:
-
-
- # New column of intent value
- NTM_df['INTENT_VAL']=list(map(filter_intent, RISK_V2_FILTERED))
- NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)
- NTM_df[:1]
-
-
- # In[358]:
-
-
- # modified
- def filter_source(source):
- sources=[]
- for source_key in source:
- if 'SOURCE_VAL_' in source_key and source[source_key]:
- source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
- sources.append(source_key_desc)
- return sources
-
-
- # In[359]:
-
-
- def get_source_desc(source_field):
- if source_field=='SOURCE_VAL_1':
- return '북한IP'
- if source_field=='SOURCE_VAL_3':
- return 'ECSC Black IP'
- else:
- return ''
-
-
- # In[360]:
-
-
- # New column of SOURCE_VAL value
- NTM_df['SOURCE_VAL']=list(map(filter_source, RISK_V2_FILTERED))
- NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)
- NTM_df[:5]
-
-
- # In[361]:
-
-
- NTM_df.drop(columns=['RISK_V2'], inplace=True)
- NTM_df.columns
-
-
- # In[362]:
-
-
- # It should be 13 columns in total
-
- # 1. 기관 INST_NM
- # 2. 공격 DRULE_ATT_TYPE_CODE1
- # 3. 자산 ASSETS_VAL
- # 4. 위협공격ip TW_ATT_IP
- # 5. 위협공격port TW_ATT_PORT
- # 6. 위협피해ip TW_DMG_IP
- # 7. 위협피해port TW_DMG_PORT
- # 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM
- # 9. 공격국가 TW_ATT_CT_NM
- # 10. 의도(7개) INTENT_VAL
- # 11. IP/URL 가중치 SOURCE_VAL
- # 12. 장비 ACCD_FIND_MTD_CODE
- # 13. 탐지규칙명 DRULE_NM
-
-
- #
-
- # In[363]:
-
-
- NTM_df.isna().sum()
-
-
- # In[364]:
-
-
- # Change the Nan to zero
- NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
- NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')
- NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
- NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)
- NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)
- NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)
- NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)
- NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
- NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)
- NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)
- NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)
- NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')
-
-
- # In[365]:
-
-
- # Check NaN out again
- NTM_df.isna().sum()
-
-
- # In[366]:
-
-
- # # Merge all
-
- # # Make one string from all of elements
- NTM_df['Combined']=NTM_df['INST_NM'].astype(str)+' '+NTM_df['TW_ATT_IP'].astype(str)
- +' '+NTM_df['TW_ATT_PORT'].astype(str)+' '+NTM_df['TW_DMG_IP'].astype(str)+' '
- +NTM_df['TW_DMG_PORT'].astype(str) +' '+NTM_df['ACCD_DMG_PROTO_NM'].astype(str)
- +' '+NTM_df['TW_ATT_CT_NM']+' '+NTM_df['ASSETS_VAL']+' '+NTM_df['INTENT_VAL']+' '
- +NTM_df['SOURCE_VAL']+' '+NTM_df['DRULE_ATT_TYPE_CODE1']+' '+NTM_df['DRULE_NM']
-
- NTM_com=NTM_df['Combined']
- NTM_com[:10]
-
-
- # In[367]:
-
-
- # Change the type to DataFrame
- NTM_to_df=pd.DataFrame(NTM_com)
- NTM_to_df[:5]
-
-
- # In[368]:
-
-
- # Change the type to list in order to apply the algorithm(nested list)
- NTM_tolist=NTM_to_df.values.tolist()
- NTM_tolist[:5]
-
-
- # In[369]:
-
-
- from prefixspan import PrefixSpan
-
-
- # In[370]:
-
-
- # Apply prefixspan
- PrefixSpan_NTM = PrefixSpan(NTM_tolist)
-
- ###### Interchangeable ######
- # Get any over frequency 1
- prefix_NTM=PrefixSpan_NTM.frequent(1)
- prefix_NTM[:3]
-
-
- # In[371]:
-
-
- # Put the result to DataFrame
- prefix_NTM_df=pd.DataFrame(prefix_NTM)
- prefix_NTM_df[:5]
-
-
- # In[372]:
-
-
- # Change the columns name
- prefix_NTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
-
- # Make the new column for filling the Effect
- prefix_NTM_df['Effect']=np.nan
-
- # Change the order of columns
- prefix_NTM_df=prefix_NTM_df[['Cause','Effect','Frequency']]
- prefix_NTM_df[:2]
-
-
- # In[373]:
-
-
- # Define the function that find the rule name
- def generate_cause(cell):
- drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
- for drule in drules:
- if ' '+drule in cell[0]:
- return drule
- return ''
-
- # Mapping the rule name with cause that is the effect
- effect=list(map(generate_cause, prefix_NTM_df.Cause))
-
- # Assign the rule name as an effect
- prefix_NTM_df['Effect']=effect
- prefix_NTM_df.sort_values(by=['Frequency'],ascending=False)
-
-
- # In[374]:
-
-
- # Attack Filter
- def Attack_filter(ps):
- return ' Attack' in ps[0]
-
- att_filter=prefix_NTM_df[list(map(Attack_filter, prefix_NTM_df.Cause))].fillna('Attack')
-
- # Malwr Filter
- def Malwr_filter(ps):
- return ' Malwr' in ps[0]
-
- mal_filter=prefix_NTM_df[list(map(Malwr_filter, prefix_NTM_df.Cause))].fillna('Malwr')
-
- # DDOS Filter
- def DDOS_filter(ps):
- return ' DDOS' in ps[0]
-
- dd_filter=prefix_NTM_df[list(map(DDOS_filter, prefix_NTM_df.Cause))].fillna('DDOS')
-
- # HACK Filter
- def HACK_filter(ps):
- return ' HACK' in ps[0]
-
- hack_filter=prefix_NTM_df[list(map(HACK_filter, prefix_NTM_df.Cause))].fillna('HACK')
-
- # MAIL Filter
- def MAIL_filter(ps):
- return ' MAIL' in ps[0]
-
- mail_filter=prefix_NTM_df[list(map(MAIL_filter, prefix_NTM_df.Cause))].fillna('MAIL')
-
- # WEB Filter
- def WEB_filter(ps):
- return ' WEB' in ps[0]
- prefix_NTM_df
- web_filter=prefix_NTM_df[list(map(WEB_filter, prefix_NTM_df.Cause))].fillna('WEB')
-
- frames = [att_filter, mal_filter, dd_filter, hack_filter, mail_filter, web_filter]
- result = pd.concat(frames)
- result.sort_values(by=['Frequency'],ascending=False)
-
-
- # In[ ]:
-
-
- ##################### NTM section End #####################
-
-
- # In[ ]:
-
-
- ##################### MTM section #####################
-
-
- # In[375]:
-
-
- MTM_df=df[df['ACCD_FIND_MTD_CODE']=='2']
- len(MTM_df)
-
-
- # In[376]:
-
-
- # Pick out it in order to get the asset, risk, intent, black IP out
- RISK_V2_MTM=MTM_df['RISK_V2']
-
- RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()
- print(RISK_V2_MTM.size)
- print(RISK_V2_FILTERED_MTM.size)
-
-
- # In[377]:
-
-
- def filter_assets_value_MTM(risk):
- risks=[]
- try:
- for risk_key in risk:
- if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
- risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)
- risks.append(risk_key_desc)
- except:
- print(risk)
- print(type(risk))
- finally:
- return risks
-
-
- # In[378]:
-
-
- # modified
- def get_asset_desc_MTM(asset_field):
- if asset_field == 'ASSETS_VAL_1':
- return '공인-전체IP대역(유선)'
- elif asset_field == 'ASSETS_VAL_2':
- return '공인-전체IP대역(무선)'
- elif asset_field == 'ASSETS_VAL_3':
- return '공인-WEB서버'
- elif asset_field == 'ASSETS_VAL_4':
- return '공인-내부응용서버'
- elif asset_field == 'ASSETS_VAL_5':
- return '공인-DB서버'
- elif asset_field == 'ASSETS_VAL_6':
- return '공인-패치서버'
- elif asset_field == 'ASSETS_VAL_7':
- return '공인-네트워크'
- elif asset_field == 'ASSETS_VAL_8':
- return '공인-보안'
- elif asset_field == 'ASSETS_VAL_9':
- return '공인-업무용PC'
- elif asset_field == 'ASSETS_VAL_10':
- return '공인-비업무용PC'
- elif asset_field == 'ASSETS_VAL_11':
- return '공인-기타'
- elif asset_field == 'ASSETS_VAL_12':
- return '사설-전체IP대역(유선)'
- elif asset_field == 'ASSETS_VAL_13':
- return '사설-전체IP대역(무선)'
- elif asset_field == 'ASSETS_VAL_14':
- return '사설-WEB서버'
- elif asset_field == 'ASSETS_VAL_15':
- return '사설-내부응용서버'
- elif asset_field == 'ASSETS_VAL_16':
- return '사설-DB서버'
- elif asset_field == 'ASSETS_VAL_17':
- return '사설-패치서버'
- elif asset_field == 'ASSETS_VAL_18':
- return '사설-네트워크'
- elif asset_field == 'ASSETS_VAL_19':
- return '사설-보안'
- elif asset_field == 'ASSETS_VAL_20':
- return '사설-업무용PC'
- elif asset_field == 'ASSETS_VAL_21':
- return '사설-비업무용PC'
- elif asset_field == 'ASSETS_VAL_22':
- return '사설-기타'
- else:
- return ''
-
-
- # In[379]:
-
-
- # New assets column
- MTM_df['ASSETS_VAL']=list(map(filter_assets_value_MTM, RISK_V2_FILTERED_MTM))
- MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)
- MTM_df[:1]
-
-
- # In[381]:
-
-
- # modified
- def filter_intent_MTM(intent):
- intents=[]
- for intent_key in intent:
- if 'INTENT_VAL_' in intent_key and intent[intent_key]:
- intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)
- intents.append(intent_key_desc)
- return intents
-
-
- # In[382]:
-
-
- def get_intent_desc_MTM(intent_field):
- if intent_field == 'INTENT_VAL_1':
- return '파괴'
- elif intent_field == 'INTENT_VAL_2':
- return '유출'
- elif intent_field == 'INTENT_VAL_3':
- return '지연'
- elif intent_field == 'INTENT_VAL_4':
- return '잠복'
- elif intent_field == 'INTENT_VAL_5':
- return '단순침입'
- elif intent_field == 'INTENT_VAL_6':
- return 'MD5'
- elif intent_field == 'INTENT_VAL_0':
- return 'Default'
- else:
- return ''
-
-
- # In[383]:
-
-
- # New column of intent value
- MTM_df['INTENT_VAL']=list(map(filter_intent_MTM, RISK_V2_FILTERED_MTM))
- MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)
- MTM_df[:1]
-
-
- # In[384]:
-
-
- # modified
- def filter_source_MTM(source):
- sources=[]
- for source_key in source:
- if 'SOURCE_VAL_' in source_key and source[source_key]:
- source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)
- sources.append(source_key_desc)
- return sources
-
-
- # In[385]:
-
-
- def get_source_desc_MTM(source_field):
- if source_field=='SOURCE_VAL_1':
- return '북한IP'
- if source_field=='SOURCE_VAL_3':
- return 'ECSC Black IP'
- else:
- return ''
-
-
- # In[386]:
-
-
- # New column of SOURCE_VAL value
- MTM_df['SOURCE_VAL']=list(map(filter_source_MTM, RISK_V2_FILTERED_MTM))
- MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].astype(str)
- MTM_df[:5]
-
-
- # In[387]:
-
-
- MTM_df.drop(columns=['RISK_V2'], inplace=True)
- MTM_df.columns
-
-
- # In[388]:
-
-
- MTM_df.isna().sum()
-
-
- # In[389]:
-
-
- # Change the Nan to zero
- MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')
- MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')
- MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')
- MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)
- MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)
- MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)
- MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)
- MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')
- MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)
- MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)
- MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)
- MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')
-
-
- # In[390]:
-
-
- # Check NaN out again
- MTM_df.isna().sum()
-
-
- # In[391]:
-
-
- # # Merge all
-
- # # Make one string from all of elements
- MTM_df['Combined']=MTM_df['INST_NM'].astype(str)+' '+MTM_df['TW_ATT_IP'].astype(str)+' '+MTM_df['TW_ATT_PORT'].astype(str)+' '+MTM_df['TW_DMG_IP'].astype(str)+' '+MTM_df['TW_DMG_PORT'].astype(str) +' '+MTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+MTM_df['TW_ATT_CT_NM']+' '+MTM_df['ASSETS_VAL']+' '+MTM_df['INTENT_VAL']+' '+MTM_df['SOURCE_VAL']+' '+MTM_df['DRULE_ATT_TYPE_CODE1']+' '+MTM_df['DRULE_NM']
-
- MTM_com=MTM_df['Combined']
- MTM_com[:10]
-
-
- # In[392]:
-
-
- # Change the type to DataFrame
- MTM_to_df=pd.DataFrame(MTM_com)
- MTM_to_df[:5]
-
-
- # In[393]:
-
-
- # Change the type to list in order to apply the algorithm(nested list)
- MTM_tolist=MTM_to_df.values.tolist()
- MTM_tolist[:5]
-
-
- # In[394]:
-
-
- # Apply prefixspan
- PrefixSpan_MTM = PrefixSpan(MTM_tolist)
-
- ###### Interchangeable ######
- # Get any over frequency 1
- prefix_MTM=PrefixSpan_MTM.frequent(1)
- prefix_MTM[:3]
-
-
- # In[395]:
-
-
- # Put the result to DataFrame
- prefix_MTM_df=pd.DataFrame(prefix_MTM)
- prefix_MTM_df[:5]
-
-
- # In[396]:
-
-
- # Change the columns name
- prefix_MTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)
-
- # Make the new column for filling the Effect
- prefix_MTM_df['Effect']=np.nan
-
- # Change the order of columns
- prefix_MTM_df=prefix_MTM_df[['Cause','Effect','Frequency']]
- prefix_MTM_df[:2]
-
-
- # In[397]:
-
-
- # Define the function that find the rule name
- def generate_cause_MTM(cell):
- drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']
- for drule in drules:
- if ' '+drule in cell[0]:
- return drule
- return ''
-
- # Mapping the rule name with cause that is the effect
- effect_MTM=list(map(generate_cause, prefix_MTM_df.Cause))
-
- # Assign the rule name as an effect
- prefix_MTM_df['Effect']=effect_MTM
- prefix_MTM_df.sort_values(by=['Frequency'],ascending=False)
-
-
- # In[399]:
-
-
- # Attack Filter
- def Attack_filter_MTM(ps):
- return ' Attack' in ps[0]
-
- att_filter_MTM=prefix_MTM_df[list(map(Attack_filter_MTM, prefix_MTM_df.Cause))].fillna('Attack')
-
- # Malwr Filter
- def Malwr_filter_MTM(ps):
- return ' Malwr' in ps[0]
-
- mal_filter_MTM=prefix_MTM_df[list(map(Malwr_filter_MTM, prefix_MTM_df.Cause))].fillna('Malwr')
-
- # DDOS Filter
- def DDOS_filter_MTM(ps):
- return ' DDOS' in ps[0]
-
- dd_filter_MTM=prefix_MTM_df[list(map(DDOS_filter_MTM, prefix_MTM_df.Cause))].fillna('DDOS')
-
- # HACK Filter
- def HACK_filter_MTM(ps):
- return ' HACK' in ps[0]
-
- hack_filter_MTM=prefix_MTM_df[list(map(HACK_filter_MTM, prefix_MTM_df.Cause))].fillna('HACK')
-
- # MAIL Filter
- def MAIL_filter_MTM(ps):
- return ' MAIL' in ps[0]
-
- mail_filter_MTM=prefix_MTM_df[list(map(MAIL_filter_MTM, prefix_MTM_df.Cause))].fillna('MAIL')
-
- # WEB Filter
- def WEB_filter_MTM(ps):
- return ' WEB' in ps[0]
-
- prefix_MTM_df[:5]
- web_filter_MTM=prefix_MTM_df[list(map(WEB_filter_MTM, prefix_MTM_df.Cause))].fillna('WEB')
-
- frames_MTM = [att_filter_MTM, mal_filter_MTM, dd_filter_MTM, hack_filter_MTM, mail_filter_MTM, web_filter_MTM]
- result_MTM = pd.concat(frames_MTM)
- result_MTM.sort_values(by=['Frequency'],ascending=False)
-
-
- # In[ ]:
-
-
-
|