| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933 |
- {
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "<p>NTM(유해트래픽 탐지장비)</p>\n",
- "<p>MTM(악성파일 탐지장비)</p>"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "10000\n",
- "10000\n"
- ]
- }
- ],
- "source": [
- "#!/usr/bin/env python\n",
- "# coding: utf-8\n",
- "\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "from mlxtend.preprocessing import TransactionEncoder\n",
- "from mlxtend.frequent_patterns import association_rules, fpgrowth\n",
- "from prefixspan import PrefixSpan\n",
- "\n",
- "# load ts_data_accident-2020_sample.csv\n",
- "# to prevent dtypewarning, set low_memory=False\n",
- "df = pd.read_csv('ts_data_accident-2020_sample.csv', low_memory=False)\n",
- "df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()\n",
- "len(df) #len(df) : 10000, load successful\n",
- "\n",
- "##################### NTM section #####################\n",
- "NTM_df=df[df['ACCD_FIND_MTD_CODE']==1] #* edit'1' to 1\n",
- "len(NTM_df)\n",
- "#* NTM_df.head()\n",
- "\n",
- "# Pick out it in order to get the asset, risk, intent, black IP out\n",
- "RISK_V2=NTM_df['RISK_V2']\n",
- "\n",
- "RISK_V2_FILTERED=RISK_V2.dropna()\n",
- "print(RISK_V2.size)\n",
- "print(RISK_V2_FILTERED.size)\n",
- "\n",
- "#* 추가 : 기존 filter_assets_value 사용시 값을 인식하지 못하는 문제 발생 -> RISK_V2를 별도의 df로 수정\n",
- "import json\n",
- "from pandas import json_normalize\n",
- "risk_df = pd.DataFrame()\n",
- "for newVal in RISK_V2_FILTERED:\n",
- " newVal = newVal.replace(\"'\", \"\\\"\")\n",
- " newVal_str = json.loads(newVal)\n",
- " newVal_df = json_normalize(newVal_str) \n",
- " risk_df = pd.concat([risk_df,newVal_df],ignore_index=True) \n",
- " \n",
- "risk_df_col = risk_df.columns.values.tolist()\n",
- "\n",
- "# In[352]:\n",
- "asset_val = []\n",
- "intent_val=[]\n",
- "source_val=[]\n",
- "def filter_assets_value(risk):\n",
- " for i in range(len(risk)):\n",
- " risks=[]\n",
- " intents=[]\n",
- " sources=[]\n",
- " try:\n",
- " for key in risk_df_col:\n",
- " if 'ASSETS_VAL_' in key and risk.iloc[i][key]:\n",
- " risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)\n",
- " risks.append(risk_key_desc)\n",
- " if 'INTENT_VAL_' in key and risk.iloc[i][key]:\n",
- " intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)\n",
- " intents.append(intent_key_desc)\n",
- " if 'SOURCE_VAL_' in key and risk.iloc[i][key]:\n",
- " source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)\n",
- " sources.append(source_key_desc)\n",
- " except:\n",
- " print(risk)\n",
- " print(type(risk))\n",
- " finally:\n",
- " asset_val.append(risks)\n",
- " intent_val.append(intents)\n",
- " source_val.append(sources)\n",
- " \n",
- " \n",
- "# modified\n",
- "def get_asset_desc(asset_field):\n",
- " if asset_field == 'ASSETS_VAL_1':\n",
- " return '공인-전체IP대역(유선)'\n",
- " elif asset_field == 'ASSETS_VAL_2':\n",
- " return '공인-전체IP대역(무선)'\n",
- " elif asset_field == 'ASSETS_VAL_3':\n",
- " return '공인-WEB서버'\n",
- " elif asset_field == 'ASSETS_VAL_4':\n",
- " return '공인-내부응용서버'\n",
- " elif asset_field == 'ASSETS_VAL_5':\n",
- " return '공인-DB서버'\n",
- " elif asset_field == 'ASSETS_VAL_6':\n",
- " return '공인-패치서버'\n",
- " elif asset_field == 'ASSETS_VAL_7':\n",
- " return '공인-네트워크'\n",
- " elif asset_field == 'ASSETS_VAL_8':\n",
- " return '공인-보안'\n",
- " elif asset_field == 'ASSETS_VAL_9':\n",
- " return '공인-업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_10':\n",
- " return '공인-비업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_11':\n",
- " return '공인-기타'\n",
- " elif asset_field == 'ASSETS_VAL_12':\n",
- " return '사설-전체IP대역(유선)'\n",
- " elif asset_field == 'ASSETS_VAL_13':\n",
- " return '사설-전체IP대역(무선)'\n",
- " elif asset_field == 'ASSETS_VAL_14':\n",
- " return '사설-WEB서버'\n",
- " elif asset_field == 'ASSETS_VAL_15':\n",
- " return '사설-내부응용서버'\n",
- " elif asset_field == 'ASSETS_VAL_16':\n",
- " return '사설-DB서버'\n",
- " elif asset_field == 'ASSETS_VAL_17':\n",
- " return '사설-패치서버'\n",
- " elif asset_field == 'ASSETS_VAL_18':\n",
- " return '사설-네트워크'\n",
- " elif asset_field == 'ASSETS_VAL_19':\n",
- " return '사설-보안'\n",
- " elif asset_field == 'ASSETS_VAL_20':\n",
- " return '사설-업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_21':\n",
- " return '사설-비업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_22':\n",
- " return '사설-기타'\n",
- " else:\n",
- " return ''\n",
- "\n",
- "\n",
- "\n",
- "# modified\n",
- "def filter_intent(intent):\n",
- " intents=[]\n",
- " for intent_key in intent:\n",
- " if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
- " intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
- " intents.append(intent_key_desc)\n",
- " return intents\n",
- "\n",
- "\n",
- "# In[356]:\n",
- "\n",
- "\n",
- "def get_intent_desc(intent_field):\n",
- " if intent_field == 'INTENT_VAL_1':\n",
- " return '파괴'\n",
- " elif intent_field == 'INTENT_VAL_2':\n",
- " return '유출'\n",
- " elif intent_field == 'INTENT_VAL_3':\n",
- " return '지연'\n",
- " elif intent_field == 'INTENT_VAL_4':\n",
- " return '잠복'\n",
- " elif intent_field == 'INTENT_VAL_5':\n",
- " return '단순침입'\n",
- " elif intent_field == 'INTENT_VAL_6':\n",
- " return 'MD5'\n",
- " elif intent_field == 'INTENT_VAL_0':\n",
- " return 'Default'\n",
- " else:\n",
- " return ''\n",
- "\n",
- "\n",
- "# In[358]:\n",
- "\n",
- "\n",
- "# modified\n",
- "def filter_source(source):\n",
- " sources=[]\n",
- " for source_key in source:\n",
- " if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
- " source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
- " sources.append(source_key_desc)\n",
- " return sources\n",
- "\n",
- "\n",
- "# In[359]:\n",
- "\n",
- "\n",
- "def get_source_desc(source_field):\n",
- " if source_field=='SOURCE_VAL_1':\n",
- " return '북한IP'\n",
- " if source_field=='SOURCE_VAL_3':\n",
- " return 'ECSC Black IP'\n",
- " else:\n",
- " return ''\n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['INST_NM', 'DRULE_ATT_TYPE_CODE1', 'TW_ATT_IP', 'TW_ATT_PORT',\n",
- " 'TW_DMG_IP', 'TW_DMG_PORT', 'ACCD_DMG_PROTO_NM', 'TW_ATT_CT_NM',\n",
- " 'ACCD_FIND_MTD_CODE', 'DRULE_NM', 'ASSETS_VAL', 'INTENT_VAL',\n",
- " 'SOURCE_VAL'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "filter_assets_value(risk_df)\n",
- "#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기\n",
- "# New assets column\n",
- "NTM_df['ASSETS_VAL']= asset_val\n",
- "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)\n",
- "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace('[','', regex=False)\n",
- "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace(']','', regex=False)\n",
- "NTM_df[:1]\n",
- "# New column of intent value\n",
- "NTM_df['INTENT_VAL']=intent_val\n",
- "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)\n",
- "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace('[','',regex=False)\n",
- "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace(']','',regex=False)\n",
- "NTM_df[:1]\n",
- "# New column of SOURCE_VAL value\n",
- "NTM_df['SOURCE_VAL']=source_val\n",
- "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n",
- "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)\n",
- "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)\n",
- "NTM_df[:5]\n",
- "\n",
- "# In[361]:\n",
- "NTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
- "NTM_df.columns"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "#data frame의 i번째 row를 list로 저장하여 itertools.combinations로 모든 조합 만들 예정\n",
- "#TW_ATT_IP와 TW_DMG_IP의 값이 같은 경우 어떤 값과의 관계인지 알 수 없으므로 데이터 가공\n",
- "NTM_df['TW_ATT_IP']=\"TW_ATT_IP=\"+NTM_df['TW_ATT_IP'].astype(str)\n",
- "NTM_df['TW_ATT_PORT']=\"TW_ATT_PORT=\"+NTM_df['TW_ATT_PORT'].astype(str)\n",
- "NTM_df['TW_DMG_IP']=\"TW_DMG_IP=\"+NTM_df['TW_DMG_IP'].astype(str)\n",
- "NTM_df['TW_DMG_PORT']=\"TW_DMG_PORT=\"+NTM_df['TW_DMG_PORT'].astype(str)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "INST_NM 0\n",
- "DRULE_ATT_TYPE_CODE1 0\n",
- "TW_ATT_IP 0\n",
- "TW_ATT_PORT 0\n",
- "TW_DMG_IP 0\n",
- "TW_DMG_PORT 0\n",
- "ACCD_DMG_PROTO_NM 0\n",
- "TW_ATT_CT_NM 0\n",
- "ACCD_FIND_MTD_CODE 0\n",
- "DRULE_NM 0\n",
- "ASSETS_VAL 0\n",
- "INTENT_VAL 0\n",
- "SOURCE_VAL 0\n",
- "dtype: int64"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "##################### 여기서부터 진행하시면 됩니다. #####################\n",
- "##################### 아래 12개 아이템(12. 장비 ACCD_FIND_MTD_CODE 제외)에 대해서 모든 아이템 조합에 알고리즘 적용하기#####################\n",
- "\n",
- "# It should be 13 columns in total\n",
- "\n",
- "# 1. 기관 INST_NM\n",
- "# 2. 공격 DRULE_ATT_TYPE_CODE1\n",
- "# 3. 자산 ASSETS_VAL\n",
- "# 4. 위협공격ip TW_ATT_IP\n",
- "# 5. 위협공격port TW_ATT_PORT\n",
- "# 6. 위협피해ip TW_DMG_IP\n",
- "# 7. 위협피해port TW_DMG_PORT\n",
- "# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM\n",
- "# 9. 공격국가 TW_ATT_CT_NM\n",
- "# 10. 의도(7개) INTENT_VAL\n",
- "# 11. IP/URL 가중치 SOURCE_VAL\n",
- "# 12. 장비 ACCD_FIND_MTD_CODE\n",
- "# 13. 탐지규칙명 DRULE_NM\n",
- "\n",
- "\n",
- "# In[363]:\n",
- "NTM_df.isna().sum()\n",
- "\n",
- "\n",
- "# Change the Nan to zero\n",
- "NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
- "NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')\n",
- "NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
- "NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
- "NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
- "NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
- "NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
- "NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
- "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
- "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)\n",
- "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
- "NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')\n",
- "\n",
- "\n",
- "# Check NaN out again\n",
- "NTM_df.isna().sum()\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "# NTM_df의 col을 list로 저장. itertools.combinations로 가능한 시나리오 모두 추출\n",
- "\n",
- "# ACCD_FIND_MTD_CODE col 지우기\n",
- "NTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "from prefixspan import PrefixSpan\n",
- "import itertools\n",
- "# arr를 매개변수로 받아 n개의 아이템의 조합 반환\n",
- "def get_combination(arr, n):\n",
- " combination_n = list(itertools.combinations(arr.columns.tolist(),n))\n",
- " combination_n = [com for com in combination_n if 'DRULE_ATT_TYPE_CODE1' in com]\n",
- " com_list=[]\n",
- " # row i 의 (1,2),(1,3)... 이런식으로 하니까 시간 너무 오래걸림\n",
- " # (1,2) 조합에 대한 row i, row i+1, row i+2... 이렇게 바꿈\n",
- " for m in range(len(combination_n)):\n",
- " for i in range(len(arr)):\n",
- " temp_list=[]\n",
- " temp_df = arr.iloc[i]\n",
- " for col in combination_n[m]:\n",
- " # 공백 처리\n",
- " if(temp_df[col]==''):\n",
- " break\n",
- " else:\n",
- " temp_list.append(temp_df[col])\n",
- " com_list.append(temp_list)\n",
- " prefix = get_prefixspan(com_list)\n",
- " return prefix\n",
- "\n",
- "def get_prefixspan(load_list):\n",
- " n = len(load_list[0])\n",
- " save_list = PrefixSpan(load_list)\n",
- " #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 \n",
- " # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정\n",
- " save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>=n)\n",
- " save_df = pd.DataFrame(save_list)\n",
- " save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
- " save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)\n",
- " save_df = get_effect(save_df)\n",
- " return save_df\n",
- "\n",
- "def get_effect(edit_df):\n",
- " #Make the new column for filling the Effect\n",
- " edit_df['Effect']=np.nan\n",
- " #Change the order of columns\n",
- " edit_df=edit_df[['Cause','Effect','Frequency']]\n",
- " for i in range(len(edit_df)):\n",
- " drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
- " temp_df = edit_df.loc[i]\n",
- " for item in temp_df['Cause']:\n",
- " for drule in drules:\n",
- " if item == drule:\n",
- " edit_df.loc[i,'Effect'] = item\n",
- " return edit_df\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Cause</th>\n",
- " <th>Effect</th>\n",
- " <th>Frequency</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>[Attack, 'RISK_V2.INTENT_VAL_5=단순침입']</td>\n",
- " <td>Attack</td>\n",
- " <td>7709</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>[Attack, 'RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
- " <td>Attack</td>\n",
- " <td>3175</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>[Attack, Attack-Scan-29-01-PHPUnit(CVE17-9841)...</td>\n",
- " <td>Attack</td>\n",
- " <td>2770</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>[Attack, 중국]</td>\n",
- " <td>Attack</td>\n",
- " <td>2689</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>[Attack, 'RISK_V2.SOURCE_VAL_3=ECSC Black IP']</td>\n",
- " <td>Attack</td>\n",
- " <td>1904</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>41145</th>\n",
- " <td>[Attack, TW_ATT_PORT=5389]</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>41146</th>\n",
- " <td>[Attack, TW_ATT_PORT=38677]</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>41147</th>\n",
- " <td>[Attack, TW_ATT_PORT=8287]</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>41148</th>\n",
- " <td>[Attack, TW_ATT_PORT=2404]</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>41149</th>\n",
- " <td>[Seoul Christian University, Malwr]</td>\n",
- " <td>Malwr</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>41150 rows × 3 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " Cause Effect Frequency\n",
- "0 [Attack, 'RISK_V2.INTENT_VAL_5=단순침입'] Attack 7709\n",
- "1 [Attack, 'RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] Attack 3175\n",
- "2 [Attack, Attack-Scan-29-01-PHPUnit(CVE17-9841)... Attack 2770\n",
- "3 [Attack, 중국] Attack 2689\n",
- "4 [Attack, 'RISK_V2.SOURCE_VAL_3=ECSC Black IP'] Attack 1904\n",
- "... ... ... ...\n",
- "41145 [Attack, TW_ATT_PORT=5389] Attack 1\n",
- "41146 [Attack, TW_ATT_PORT=38677] Attack 1\n",
- "41147 [Attack, TW_ATT_PORT=8287] Attack 1\n",
- "41148 [Attack, TW_ATT_PORT=2404] Attack 1\n",
- "41149 [Seoul Christian University, Malwr] Malwr 1\n",
- "\n",
- "[41150 rows x 3 columns]"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 1. 두 아이템의 조합\n",
- "item = 2\n",
- "prefix_of_two = get_combination(NTM_df, item)\n",
- "prefix_of_two"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "ename": "KeyboardInterrupt",
- "evalue": "",
- "output_type": "error",
- "traceback": [
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
- "\u001b[1;32m<ipython-input-8-fdb1732ee6a2>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# 2. 세 아이템의 조합\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mprefix_of_three\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_combination\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mNTM_df\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_combination\u001b[1;34m(arr, n)\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[0mtemp_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemp_df\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[0mcom_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemp_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 21\u001b[1;33m \u001b[0mprefix\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_prefixspan\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcom_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 22\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mprefix\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_prefixspan\u001b[1;34m(load_list)\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[0msave_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m{\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'Frequency'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'Cause'\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 32\u001b[0m \u001b[0msave_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msave_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Frequency'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mascending\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mignore_index\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m \u001b[0msave_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_effect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msave_df\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 34\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0msave_df\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_effect\u001b[1;34m(edit_df)\u001b[0m\n\u001b[0;32m 45\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mdrule\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdrules\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 46\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mitem\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mdrule\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 47\u001b[1;33m \u001b[0medit_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Effect'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mitem\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 48\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0medit_df\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m 690\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 691\u001b[0m \u001b[0miloc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"iloc\"\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 692\u001b[1;33m \u001b[0miloc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_with_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 693\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 694\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_validate_key\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer\u001b[1;34m(self, indexer, value, name)\u001b[0m\n\u001b[0;32m 1633\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mtake_split_path\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1634\u001b[0m \u001b[1;31m# We have to operate column-wise\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1635\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_with_indexer_split_path\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1636\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1637\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_single_block\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer_split_path\u001b[1;34m(self, indexer, value, name)\u001b[0m\n\u001b[0;32m 1718\u001b[0m \u001b[1;31m# scalar value\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1719\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mloc\u001b[0m \u001b[1;32min\u001b[0m \u001b[0milocs\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1720\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_single_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpi\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1721\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1722\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_setitem_with_indexer_2d_value\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_single_column\u001b[1;34m(self, loc, value, plane_indexer)\u001b[0m\n\u001b[0;32m 1815\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1816\u001b[0m \u001b[1;31m# reset the sliced object if unique\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1817\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_iset_item\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mser\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1818\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1819\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_setitem_single_block\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_iset_item\u001b[1;34m(self, loc, value)\u001b[0m\n\u001b[0;32m 3220\u001b[0m \u001b[1;31m# technically _sanitize_column expects a label, not a position,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3221\u001b[0m \u001b[1;31m# but the behavior is the same as long as we pass broadcast=False\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3222\u001b[1;33m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sanitize_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbroadcast\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3223\u001b[0m \u001b[0mNDFrame\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_iset_item\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3224\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_sanitize_column\u001b[1;34m(self, key, value, broadcast)\u001b[0m\n\u001b[0;32m 3874\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3875\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3876\u001b[1;33m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mreindexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3877\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3878\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mreindexer\u001b[1;34m(value)\u001b[0m\n\u001b[0;32m 3855\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3856\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mequals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mor\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3857\u001b[1;33m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3858\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3859\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
- ]
- }
- ],
- "source": [
- "# 2. 세 아이템의 조합\n",
- "prefix_of_three = get_combination(NTM_df, 3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 3. 네 아이템의 조합\n",
- "prefix_of_four = get_combination(NTM_df, 4)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 4. 다섯 아이템의 조합\n",
- "prefix_of_five = get_combination(NTM_df, 5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 5. 여섯 아이템의 조합\n",
- "prefix_of_six = get_combination(NTM_df, 6)\n",
- "##################### NTM section End #####################"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "##################### MTM section #####################\n",
- "# Same goes for the MTM section\n",
- "\n",
- "# In[375]:\n",
- "\n",
- "\n",
- "MTM_df=df[df['ACCD_FIND_MTD_CODE']==2]\n",
- "len(MTM_df)\n",
- "\n",
- "\n",
- "# In[376]:\n",
- "\n",
- "\n",
- "# Pick out it in order to get the asset, risk, intent, black IP out\n",
- "RISK_V2_MTM=MTM_df['RISK_V2']\n",
- "\n",
- "RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()\n",
- "print(RISK_V2_MTM.size)\n",
- "print(RISK_V2_FILTERED_MTM.size)\n",
- "\n",
- "risk_df_MTM = pd.DataFrame()\n",
- "for newVal_MTM in RISK_V2_FILTERED_MTM:\n",
- " newVal_MTM = newVal_MTM.replace(\"'\", \"\\\"\")\n",
- " newVal_MTM_str = json.loads(newVal_MTM)\n",
- " newVal_df_MTM = json_normalize(newVal_MTM_str) \n",
- " risk_df_MTM = pd.concat([risk_df_MTM,newVal_df_MTM],ignore_index=True) \n",
- " \n",
- "risk_df_col_MTM = risk_df_MTM.columns.values.tolist()\n",
- "\n",
- "# In[377]:\n",
- "\n",
- "\n",
- "asset_val_MTM = []\n",
- "intent_val_MTM=[]\n",
- "source_val_MTM=[]\n",
- "\n",
- "def filter_assets_value_MTM(risk):\n",
- " for i in range(len(risk)):\n",
- " risks=[]\n",
- " intents=[]\n",
- " sources=[]\n",
- " try:\n",
- " for key in risk_df_col:\n",
- " if 'ASSETS_VAL_' in key and risk.iloc[i][key]:\n",
- " risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)\n",
- " risks.append(risk_key_desc)\n",
- " if 'INTENT_VAL_' in key and risk.iloc[i][key]:\n",
- " intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)\n",
- " intents.append(intent_key_desc)\n",
- " if 'SOURCE_VAL_' in key and risk.iloc[i][key]:\n",
- " source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)\n",
- " sources.append(source_key_desc)\n",
- " except:\n",
- " print(risk)\n",
- " print(type(risk))\n",
- " finally:\n",
- " asset_val_MTM.append(risks)\n",
- " intent_val_MTM.append(intents)\n",
- " source_val_MTM.append(sources)\n",
- "\n",
- "# In[378]:\n",
- "\n",
- "# modified\n",
- "def get_asset_desc_MTM(asset_field):\n",
- " if asset_field == 'ASSETS_VAL_1':\n",
- " return '공인-전체IP대역(유선)'\n",
- " elif asset_field == 'ASSETS_VAL_2':\n",
- " return '공인-전체IP대역(무선)'\n",
- " elif asset_field == 'ASSETS_VAL_3':\n",
- " return '공인-WEB서버'\n",
- " elif asset_field == 'ASSETS_VAL_4':\n",
- " return '공인-내부응용서버'\n",
- " elif asset_field == 'ASSETS_VAL_5':\n",
- " return '공인-DB서버'\n",
- " elif asset_field == 'ASSETS_VAL_6':\n",
- " return '공인-패치서버'\n",
- " elif asset_field == 'ASSETS_VAL_7':\n",
- " return '공인-네트워크'\n",
- " elif asset_field == 'ASSETS_VAL_8':\n",
- " return '공인-보안'\n",
- " elif asset_field == 'ASSETS_VAL_9':\n",
- " return '공인-업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_10':\n",
- " return '공인-비업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_11':\n",
- " return '공인-기타'\n",
- " elif asset_field == 'ASSETS_VAL_12':\n",
- " return '사설-전체IP대역(유선)'\n",
- " elif asset_field == 'ASSETS_VAL_13':\n",
- " return '사설-전체IP대역(무선)'\n",
- " elif asset_field == 'ASSETS_VAL_14':\n",
- " return '사설-WEB서버'\n",
- " elif asset_field == 'ASSETS_VAL_15':\n",
- " return '사설-내부응용서버'\n",
- " elif asset_field == 'ASSETS_VAL_16':\n",
- " return '사설-DB서버'\n",
- " elif asset_field == 'ASSETS_VAL_17':\n",
- " return '사설-패치서버'\n",
- " elif asset_field == 'ASSETS_VAL_18':\n",
- " return '사설-네트워크'\n",
- " elif asset_field == 'ASSETS_VAL_19':\n",
- " return '사설-보안'\n",
- " elif asset_field == 'ASSETS_VAL_20':\n",
- " return '사설-업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_21':\n",
- " return '사설-비업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_22':\n",
- " return '사설-기타'\n",
- " else:\n",
- " return ''\n",
- "\n",
- "\n",
- "# In[381]:\n",
- "\n",
- "\n",
- "# modified\n",
- "def filter_intent_MTM(intent):\n",
- " intents=[]\n",
- " for intent_key in intent:\n",
- " if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
- " intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
- " intents.append(intent_key_desc)\n",
- " return intents\n",
- "\n",
- "\n",
- "# In[382]:\n",
- "\n",
- "\n",
- "def get_intent_desc_MTM(intent_field):\n",
- " if intent_field == 'INTENT_VAL_1':\n",
- " return '파괴'\n",
- " elif intent_field == 'INTENT_VAL_2':\n",
- " return '유출'\n",
- " elif intent_field == 'INTENT_VAL_3':\n",
- " return '지연'\n",
- " elif intent_field == 'INTENT_VAL_4':\n",
- " return '잠복'\n",
- " elif intent_field == 'INTENT_VAL_5':\n",
- " return '단순침입'\n",
- " elif intent_field == 'INTENT_VAL_6':\n",
- " return 'MD5'\n",
- " elif intent_field == 'INTENT_VAL_0':\n",
- " return 'Default'\n",
- " else:\n",
- " return ''\n",
- "\n",
- "\n",
- "\n",
- "# In[384]:\n",
- "\n",
- "\n",
- "# modified\n",
- "def filter_source_MTM(source):\n",
- " sources=[]\n",
- " for source_key in source:\n",
- " if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
- " source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
- " sources.append(source_key_desc)\n",
- " return sources\n",
- "\n",
- "\n",
- "# In[385]:\n",
- "\n",
- "\n",
- "def get_source_desc_MTM(source_field):\n",
- " if source_field=='SOURCE_VAL_1':\n",
- " return '북한IP'\n",
- " if source_field=='SOURCE_VAL_3':\n",
- " return 'ECSC Black IP'\n",
- " else:\n",
- " return ''\n",
- "\n",
- "\n",
- "# In[386]:\n",
- "\n",
- "filter_assets_value(risk_df_MTM)\n",
- "#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기\n",
- "# New assets column\n",
- "MTM_df['ASSETS_VAL']= asset_val_MTM\n",
- "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)\n",
- "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace('[','', regex=False)\n",
- "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace(']','', regex=False)\n",
- "MTM_df[:1]\n",
- "# New column of intent value\n",
- "MTM_df['INTENT_VAL']=intent_val_MTM\n",
- "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)\n",
- "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace('[','',regex=False)\n",
- "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace(']','',regex=False)\n",
- "MTM_df[:1]\n",
- "# New column of SOURCE_VAL value\n",
- "MTM_df['SOURCE_VAL']=source_val_MTM\n",
- "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n",
- "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)\n",
- "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)\n",
- "MTM_df[:5]\n",
- "\n",
- "# In[361]:\n",
- "MTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
- "MTM_df.columns\n",
- "\n",
- "\n",
- "# In[388]:\n",
- "\n",
- "\n",
- "MTM_df.isna().sum()\n",
- "\n",
- "\n",
- "# In[389]:\n",
- "\n",
- "\n",
- "# Change the Nan to zero\n",
- "MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
- "MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')\n",
- "MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
- "MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
- "MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
- "MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
- "MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
- "MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
- "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
- "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)\n",
- "MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
- "MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')\n",
- "\n",
- "\n",
- "# In[390]:\n",
- "\n",
- "\n",
- "# Check NaN out again\n",
- "MTM_df.isna().sum()\n",
- "\n",
- "\n",
- "# In[391]:\n",
- "\n",
- "# ACCD_FIND_MTD_CODE col 지우기\n",
- "MTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)\n",
- "\n",
- "# arr를 매개변수로 받아 n개의 아이템의 조합 반환\n",
- "def get_combination_MTM(arr, n):\n",
- " combination_n = list(itertools.combinations(arr.columns.tolist(),n))\n",
- " combination_n = [com for com in combination_n if 'DRULE_ATT_TYPE_CODE1' in com]\n",
- " com_list=[]\n",
- " for m in range(len(combination_n)):\n",
- " for i in range(len(arr)):\n",
- " temp_list=[]\n",
- " temp_df = arr.iloc[i]\n",
- " for col in combination_n[m]:\n",
- " # 공백 처리\n",
- " if(temp_df[col]==''):\n",
- " break\n",
- " else:\n",
- " temp_list.append(temp_df[col])\n",
- " com_list.append(temp_list)\n",
- " prefix = get_prefixspan_MTM(com_list)\n",
- " return prefix\n",
- "\n",
- "def get_prefixspan_MTM(load_list):\n",
- " n = len(load_list[0])\n",
- " save_list = PrefixSpan(load_list)\n",
- " #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 \n",
- " # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정\n",
- " save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>=n)\n",
- " save_df = pd.DataFrame(save_list)\n",
- " save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
- " save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)\n",
- " save_df = get_effect_MTM(save_df)\n",
- " return save_df\n",
- "\n",
- "def get_effect_MTM(edit_df):\n",
- " #Make the new column for filling the Effect\n",
- " edit_df['Effect']=np.nan\n",
- " #Change the order of columns\n",
- " edit_df=edit_df[['Cause','Effect','Frequency']]\n",
- " for i in range(len(edit_df)):\n",
- " drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
- " temp_df = edit_df.loc[i]\n",
- " for item in temp_df['Cause']:\n",
- " for drule in drules:\n",
- " if item == drule:\n",
- " edit_df.loc[i,'Effect'] = item\n",
- " return edit_df\n",
- "\n",
- "\n",
- "\n",
- "# 1. 두 아이템의 조합\n",
- "prefix_of_two_MTM = get_combination(MTM_df,2)\n",
- "\n",
- "# 2. 세 아이템의 조합\n",
- "prefix_of_three_MTM = get_combination(MTM_df, 3)\n",
- "\n",
- "# 3. 네 아이템의 조합\n",
- "prefix_of_four_MTM = get_combination(MTM_df, 4)\n",
- "\n",
- "# 4. 다섯 아이템의 조합\n",
- "prefix_of_five_MTM = get_combination(MTM_df, 5)\n",
- "\n",
- "\n",
- "# 5. 여섯 아이템의 조합\n",
- "prefix_of_six_MTM = get_combination(MTM_df, 6)\n",
- "\n",
- "##################### MTM section End #####################"
- ]
- }
- ],
- "metadata": {
- "anaconda-cloud": {},
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
- }
|