Просмотр исходного кода

삭제 'keris.ipynb/PrefixSpan_20211021.ipynb'

.ipynb 삭제 후 .py 재업로드 예정
master
yevKwon 4 лет назад
Родитель
Сommit
4dd3024e8d
1 измененных файлов: 0 добавлений и 933 удалений
  1. 0
    933
      keris.ipynb/PrefixSpan_20211021.ipynb

+ 0
- 933
keris.ipynb/PrefixSpan_20211021.ipynb Просмотреть файл

@@ -1,933 +0,0 @@
1
-{
2
- "cells": [
3
-  {
4
-   "cell_type": "markdown",
5
-   "metadata": {},
6
-   "source": [
7
-    "<p>NTM(유해트래픽 탐지장비)</p>\n",
8
-    "<p>MTM(악성파일 탐지장비)</p>"
9
-   ]
10
-  },
11
-  {
12
-   "cell_type": "code",
13
-   "execution_count": 1,
14
-   "metadata": {},
15
-   "outputs": [
16
-    {
17
-     "name": "stdout",
18
-     "output_type": "stream",
19
-     "text": [
20
-      "10000\n",
21
-      "10000\n"
22
-     ]
23
-    }
24
-   ],
25
-   "source": [
26
-    "#!/usr/bin/env python\n",
27
-    "# coding: utf-8\n",
28
-    "\n",
29
-    "import pandas as pd\n",
30
-    "import numpy as np\n",
31
-    "from mlxtend.preprocessing import TransactionEncoder\n",
32
-    "from mlxtend.frequent_patterns import association_rules, fpgrowth\n",
33
-    "from prefixspan import PrefixSpan\n",
34
-    "\n",
35
-    "# load ts_data_accident-2020_sample.csv\n",
36
-    "# to prevent dtypewarning, set low_memory=False\n",
37
-    "df = pd.read_csv('ts_data_accident-2020_sample.csv', low_memory=False)\n",
38
-    "df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()\n",
39
-    "len(df) #len(df) : 10000, load successful\n",
40
-    "\n",
41
-    "##################### NTM section #####################\n",
42
-    "NTM_df=df[df['ACCD_FIND_MTD_CODE']==1] #* edit'1' to 1\n",
43
-    "len(NTM_df)\n",
44
-    "#* NTM_df.head()\n",
45
-    "\n",
46
-    "# Pick out it in order to get the asset, risk, intent, black IP out\n",
47
-    "RISK_V2=NTM_df['RISK_V2']\n",
48
-    "\n",
49
-    "RISK_V2_FILTERED=RISK_V2.dropna()\n",
50
-    "print(RISK_V2.size)\n",
51
-    "print(RISK_V2_FILTERED.size)\n",
52
-    "\n",
53
-    "#* 추가 : 기존 filter_assets_value 사용시 값을 인식하지 못하는 문제 발생 -> RISK_V2를 별도의 df로 수정\n",
54
-    "import json\n",
55
-    "from pandas import json_normalize\n",
56
-    "risk_df = pd.DataFrame()\n",
57
-    "for newVal in RISK_V2_FILTERED:\n",
58
-    "    newVal = newVal.replace(\"'\", \"\\\"\")\n",
59
-    "    newVal_str = json.loads(newVal)\n",
60
-    "    newVal_df = json_normalize(newVal_str) \n",
61
-    "    risk_df = pd.concat([risk_df,newVal_df],ignore_index=True) \n",
62
-    "    \n",
63
-    "risk_df_col = risk_df.columns.values.tolist()\n",
64
-    "\n",
65
-    "# In[352]:\n",
66
-    "asset_val = []\n",
67
-    "intent_val=[]\n",
68
-    "source_val=[]\n",
69
-    "def filter_assets_value(risk):\n",
70
-    "    for i in range(len(risk)):\n",
71
-    "        risks=[]\n",
72
-    "        intents=[]\n",
73
-    "        sources=[]\n",
74
-    "        try:\n",
75
-    "            for key in risk_df_col:\n",
76
-    "                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:\n",
77
-    "                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)\n",
78
-    "                    risks.append(risk_key_desc)\n",
79
-    "                if 'INTENT_VAL_' in key and risk.iloc[i][key]:\n",
80
-    "                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)\n",
81
-    "                    intents.append(intent_key_desc)\n",
82
-    "                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:\n",
83
-    "                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)\n",
84
-    "                    sources.append(source_key_desc)\n",
85
-    "        except:\n",
86
-    "            print(risk)\n",
87
-    "            print(type(risk))\n",
88
-    "        finally:\n",
89
-    "            asset_val.append(risks)\n",
90
-    "            intent_val.append(intents)\n",
91
-    "            source_val.append(sources)\n",
92
-    "    \n",
93
-    "    \n",
94
-    "# modified\n",
95
-    "def get_asset_desc(asset_field):\n",
96
-    "    if asset_field == 'ASSETS_VAL_1':\n",
97
-    "        return '공인-전체IP대역(유선)'\n",
98
-    "    elif asset_field == 'ASSETS_VAL_2':\n",
99
-    "        return '공인-전체IP대역(무선)'\n",
100
-    "    elif asset_field == 'ASSETS_VAL_3':\n",
101
-    "        return '공인-WEB서버'\n",
102
-    "    elif asset_field == 'ASSETS_VAL_4':\n",
103
-    "        return '공인-내부응용서버'\n",
104
-    "    elif asset_field == 'ASSETS_VAL_5':\n",
105
-    "        return '공인-DB서버'\n",
106
-    "    elif asset_field == 'ASSETS_VAL_6':\n",
107
-    "        return '공인-패치서버'\n",
108
-    "    elif asset_field == 'ASSETS_VAL_7':\n",
109
-    "        return '공인-네트워크'\n",
110
-    "    elif asset_field == 'ASSETS_VAL_8':\n",
111
-    "        return '공인-보안'\n",
112
-    "    elif asset_field == 'ASSETS_VAL_9':\n",
113
-    "        return '공인-업무용PC'\n",
114
-    "    elif asset_field == 'ASSETS_VAL_10':\n",
115
-    "        return '공인-비업무용PC'\n",
116
-    "    elif asset_field == 'ASSETS_VAL_11':\n",
117
-    "        return '공인-기타'\n",
118
-    "    elif asset_field == 'ASSETS_VAL_12':\n",
119
-    "        return '사설-전체IP대역(유선)'\n",
120
-    "    elif asset_field == 'ASSETS_VAL_13':\n",
121
-    "        return '사설-전체IP대역(무선)'\n",
122
-    "    elif asset_field == 'ASSETS_VAL_14':\n",
123
-    "        return '사설-WEB서버'\n",
124
-    "    elif asset_field == 'ASSETS_VAL_15':\n",
125
-    "        return '사설-내부응용서버'\n",
126
-    "    elif asset_field == 'ASSETS_VAL_16':\n",
127
-    "        return '사설-DB서버'\n",
128
-    "    elif asset_field == 'ASSETS_VAL_17':\n",
129
-    "        return '사설-패치서버'\n",
130
-    "    elif asset_field == 'ASSETS_VAL_18':\n",
131
-    "        return '사설-네트워크'\n",
132
-    "    elif asset_field == 'ASSETS_VAL_19':\n",
133
-    "        return '사설-보안'\n",
134
-    "    elif asset_field == 'ASSETS_VAL_20':\n",
135
-    "        return '사설-업무용PC'\n",
136
-    "    elif asset_field == 'ASSETS_VAL_21':\n",
137
-    "        return '사설-비업무용PC'\n",
138
-    "    elif asset_field == 'ASSETS_VAL_22':\n",
139
-    "        return '사설-기타'\n",
140
-    "    else:\n",
141
-    "        return ''\n",
142
-    "\n",
143
-    "\n",
144
-    "\n",
145
-    "# modified\n",
146
-    "def filter_intent(intent):\n",
147
-    "    intents=[]\n",
148
-    "    for intent_key in intent:\n",
149
-    "        if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
150
-    "            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
151
-    "            intents.append(intent_key_desc)\n",
152
-    "    return intents\n",
153
-    "\n",
154
-    "\n",
155
-    "# In[356]:\n",
156
-    "\n",
157
-    "\n",
158
-    "def get_intent_desc(intent_field):\n",
159
-    "    if intent_field == 'INTENT_VAL_1':\n",
160
-    "        return '파괴'\n",
161
-    "    elif intent_field == 'INTENT_VAL_2':\n",
162
-    "        return '유출'\n",
163
-    "    elif intent_field == 'INTENT_VAL_3':\n",
164
-    "        return '지연'\n",
165
-    "    elif intent_field == 'INTENT_VAL_4':\n",
166
-    "        return '잠복'\n",
167
-    "    elif intent_field == 'INTENT_VAL_5':\n",
168
-    "        return '단순침입'\n",
169
-    "    elif intent_field == 'INTENT_VAL_6':\n",
170
-    "        return 'MD5'\n",
171
-    "    elif intent_field == 'INTENT_VAL_0':\n",
172
-    "        return 'Default'\n",
173
-    "    else:\n",
174
-    "        return ''\n",
175
-    "\n",
176
-    "\n",
177
-    "# In[358]:\n",
178
-    "\n",
179
-    "\n",
180
-    "# modified\n",
181
-    "def filter_source(source):\n",
182
-    "    sources=[]\n",
183
-    "    for source_key in source:\n",
184
-    "        if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
185
-    "            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
186
-    "            sources.append(source_key_desc)\n",
187
-    "    return sources\n",
188
-    "\n",
189
-    "\n",
190
-    "# In[359]:\n",
191
-    "\n",
192
-    "\n",
193
-    "def get_source_desc(source_field):\n",
194
-    "    if source_field=='SOURCE_VAL_1':\n",
195
-    "        return '북한IP'\n",
196
-    "    if source_field=='SOURCE_VAL_3':\n",
197
-    "        return 'ECSC Black IP'\n",
198
-    "    else:\n",
199
-    "        return ''\n",
200
-    "\n",
201
-    "\n"
202
-   ]
203
-  },
204
-  {
205
-   "cell_type": "code",
206
-   "execution_count": 2,
207
-   "metadata": {},
208
-   "outputs": [
209
-    {
210
-     "data": {
211
-      "text/plain": [
212
-       "Index(['INST_NM', 'DRULE_ATT_TYPE_CODE1', 'TW_ATT_IP', 'TW_ATT_PORT',\n",
213
-       "       'TW_DMG_IP', 'TW_DMG_PORT', 'ACCD_DMG_PROTO_NM', 'TW_ATT_CT_NM',\n",
214
-       "       'ACCD_FIND_MTD_CODE', 'DRULE_NM', 'ASSETS_VAL', 'INTENT_VAL',\n",
215
-       "       'SOURCE_VAL'],\n",
216
-       "      dtype='object')"
217
-      ]
218
-     },
219
-     "execution_count": 2,
220
-     "metadata": {},
221
-     "output_type": "execute_result"
222
-    }
223
-   ],
224
-   "source": [
225
-    "filter_assets_value(risk_df)\n",
226
-    "#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기\n",
227
-    "# New assets column\n",
228
-    "NTM_df['ASSETS_VAL']= asset_val\n",
229
-    "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)\n",
230
-    "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace('[','', regex=False)\n",
231
-    "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace(']','', regex=False)\n",
232
-    "NTM_df[:1]\n",
233
-    "# New column of intent value\n",
234
-    "NTM_df['INTENT_VAL']=intent_val\n",
235
-    "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)\n",
236
-    "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace('[','',regex=False)\n",
237
-    "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace(']','',regex=False)\n",
238
-    "NTM_df[:1]\n",
239
-    "# New column of SOURCE_VAL value\n",
240
-    "NTM_df['SOURCE_VAL']=source_val\n",
241
-    "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n",
242
-    "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)\n",
243
-    "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)\n",
244
-    "NTM_df[:5]\n",
245
-    "\n",
246
-    "# In[361]:\n",
247
-    "NTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
248
-    "NTM_df.columns"
249
-   ]
250
-  },
251
-  {
252
-   "cell_type": "code",
253
-   "execution_count": 3,
254
-   "metadata": {},
255
-   "outputs": [],
256
-   "source": [
257
-    "#data frame의 i번째 row를 list로 저장하여 itertools.combinations로 모든 조합 만들 예정\n",
258
-    "#TW_ATT_IP와 TW_DMG_IP의 값이 같은 경우 어떤 값과의 관계인지 알 수 없으므로 데이터 가공\n",
259
-    "NTM_df['TW_ATT_IP']=\"TW_ATT_IP=\"+NTM_df['TW_ATT_IP'].astype(str)\n",
260
-    "NTM_df['TW_ATT_PORT']=\"TW_ATT_PORT=\"+NTM_df['TW_ATT_PORT'].astype(str)\n",
261
-    "NTM_df['TW_DMG_IP']=\"TW_DMG_IP=\"+NTM_df['TW_DMG_IP'].astype(str)\n",
262
-    "NTM_df['TW_DMG_PORT']=\"TW_DMG_PORT=\"+NTM_df['TW_DMG_PORT'].astype(str)"
263
-   ]
264
-  },
265
-  {
266
-   "cell_type": "code",
267
-   "execution_count": 4,
268
-   "metadata": {},
269
-   "outputs": [
270
-    {
271
-     "data": {
272
-      "text/plain": [
273
-       "INST_NM                 0\n",
274
-       "DRULE_ATT_TYPE_CODE1    0\n",
275
-       "TW_ATT_IP               0\n",
276
-       "TW_ATT_PORT             0\n",
277
-       "TW_DMG_IP               0\n",
278
-       "TW_DMG_PORT             0\n",
279
-       "ACCD_DMG_PROTO_NM       0\n",
280
-       "TW_ATT_CT_NM            0\n",
281
-       "ACCD_FIND_MTD_CODE      0\n",
282
-       "DRULE_NM                0\n",
283
-       "ASSETS_VAL              0\n",
284
-       "INTENT_VAL              0\n",
285
-       "SOURCE_VAL              0\n",
286
-       "dtype: int64"
287
-      ]
288
-     },
289
-     "execution_count": 4,
290
-     "metadata": {},
291
-     "output_type": "execute_result"
292
-    }
293
-   ],
294
-   "source": [
295
-    "##################### 여기서부터 진행하시면 됩니다. #####################\n",
296
-    "##################### 아래 12개 아이템(12. 장비 ACCD_FIND_MTD_CODE 제외)에 대해서 모든 아이템 조합에 알고리즘 적용하기#####################\n",
297
-    "\n",
298
-    "# It should be 13 columns in total\n",
299
-    "\n",
300
-    "# 1. 기관 INST_NM\n",
301
-    "# 2. 공격 DRULE_ATT_TYPE_CODE1\n",
302
-    "# 3. 자산 ASSETS_VAL\n",
303
-    "# 4. 위협공격ip TW_ATT_IP\n",
304
-    "# 5. 위협공격port TW_ATT_PORT\n",
305
-    "# 6. 위협피해ip TW_DMG_IP\n",
306
-    "# 7. 위협피해port TW_DMG_PORT\n",
307
-    "# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM\n",
308
-    "# 9. 공격국가 TW_ATT_CT_NM\n",
309
-    "# 10. 의도(7개) INTENT_VAL\n",
310
-    "# 11. IP/URL 가중치 SOURCE_VAL\n",
311
-    "# 12. 장비 ACCD_FIND_MTD_CODE\n",
312
-    "# 13. 탐지규칙명 DRULE_NM\n",
313
-    "\n",
314
-    "\n",
315
-    "# In[363]:\n",
316
-    "NTM_df.isna().sum()\n",
317
-    "\n",
318
-    "\n",
319
-    "# Change the Nan to zero\n",
320
-    "NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
321
-    "NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')\n",
322
-    "NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
323
-    "NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
324
-    "NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
325
-    "NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
326
-    "NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
327
-    "NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
328
-    "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
329
-    "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)\n",
330
-    "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
331
-    "NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')\n",
332
-    "\n",
333
-    "\n",
334
-    "# Check NaN out again\n",
335
-    "NTM_df.isna().sum()\n"
336
-   ]
337
-  },
338
-  {
339
-   "cell_type": "code",
340
-   "execution_count": 5,
341
-   "metadata": {},
342
-   "outputs": [],
343
-   "source": [
344
-    "# NTM_df의 col을 list로 저장. itertools.combinations로 가능한 시나리오 모두 추출\n",
345
-    "\n",
346
-    "# ACCD_FIND_MTD_CODE col 지우기\n",
347
-    "NTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)"
348
-   ]
349
-  },
350
-  {
351
-   "cell_type": "code",
352
-   "execution_count": 6,
353
-   "metadata": {},
354
-   "outputs": [],
355
-   "source": [
356
-    "from prefixspan import PrefixSpan\n",
357
-    "import itertools\n",
358
-    "# arr를 매개변수로 받아 n개의 아이템의 조합 반환\n",
359
-    "def get_combination(arr, n):\n",
360
-    "    combination_n = list(itertools.combinations(arr.columns.tolist(),n))\n",
361
-    "    combination_n = [com for com in combination_n if 'DRULE_ATT_TYPE_CODE1' in com]\n",
362
-    "    com_list=[]\n",
363
-    "    # row i 의 (1,2),(1,3)... 이런식으로 하니까 시간 너무 오래걸림\n",
364
-    "    # (1,2) 조합에 대한 row i, row i+1, row i+2... 이렇게 바꿈\n",
365
-    "    for m in range(len(combination_n)):\n",
366
-    "        for i in range(len(arr)):\n",
367
-    "            temp_list=[]\n",
368
-    "            temp_df = arr.iloc[i]\n",
369
-    "            for col in combination_n[m]:\n",
370
-    "                # 공백 처리\n",
371
-    "                if(temp_df[col]==''):\n",
372
-    "                    break\n",
373
-    "                else:\n",
374
-    "                    temp_list.append(temp_df[col])\n",
375
-    "            com_list.append(temp_list)\n",
376
-    "    prefix = get_prefixspan(com_list)\n",
377
-    "    return prefix\n",
378
-    "\n",
379
-    "def get_prefixspan(load_list):\n",
380
-    "    n = len(load_list[0])\n",
381
-    "    save_list = PrefixSpan(load_list)\n",
382
-    "    #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 \n",
383
-    "    # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정\n",
384
-    "    save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>=n)\n",
385
-    "    save_df = pd.DataFrame(save_list)\n",
386
-    "    save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
387
-    "    save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)\n",
388
-    "    save_df = get_effect(save_df)\n",
389
-    "    return save_df\n",
390
-    "\n",
391
-    "def get_effect(edit_df):\n",
392
-    "    #Make the new column for filling the Effect\n",
393
-    "    edit_df['Effect']=np.nan\n",
394
-    "     #Change the order of columns\n",
395
-    "    edit_df=edit_df[['Cause','Effect','Frequency']]\n",
396
-    "    for i in range(len(edit_df)):\n",
397
-    "        drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
398
-    "        temp_df = edit_df.loc[i]\n",
399
-    "        for item in temp_df['Cause']:\n",
400
-    "            for drule in drules:\n",
401
-    "                if item == drule:\n",
402
-    "                    edit_df.loc[i,'Effect'] = item\n",
403
-    "    return edit_df\n"
404
-   ]
405
-  },
406
-  {
407
-   "cell_type": "code",
408
-   "execution_count": 7,
409
-   "metadata": {},
410
-   "outputs": [
411
-    {
412
-     "data": {
413
-      "text/html": [
414
-       "<div>\n",
415
-       "<style scoped>\n",
416
-       "    .dataframe tbody tr th:only-of-type {\n",
417
-       "        vertical-align: middle;\n",
418
-       "    }\n",
419
-       "\n",
420
-       "    .dataframe tbody tr th {\n",
421
-       "        vertical-align: top;\n",
422
-       "    }\n",
423
-       "\n",
424
-       "    .dataframe thead th {\n",
425
-       "        text-align: right;\n",
426
-       "    }\n",
427
-       "</style>\n",
428
-       "<table border=\"1\" class=\"dataframe\">\n",
429
-       "  <thead>\n",
430
-       "    <tr style=\"text-align: right;\">\n",
431
-       "      <th></th>\n",
432
-       "      <th>Cause</th>\n",
433
-       "      <th>Effect</th>\n",
434
-       "      <th>Frequency</th>\n",
435
-       "    </tr>\n",
436
-       "  </thead>\n",
437
-       "  <tbody>\n",
438
-       "    <tr>\n",
439
-       "      <th>0</th>\n",
440
-       "      <td>[Attack, 'RISK_V2.INTENT_VAL_5=단순침입']</td>\n",
441
-       "      <td>Attack</td>\n",
442
-       "      <td>7709</td>\n",
443
-       "    </tr>\n",
444
-       "    <tr>\n",
445
-       "      <th>1</th>\n",
446
-       "      <td>[Attack, 'RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
447
-       "      <td>Attack</td>\n",
448
-       "      <td>3175</td>\n",
449
-       "    </tr>\n",
450
-       "    <tr>\n",
451
-       "      <th>2</th>\n",
452
-       "      <td>[Attack, Attack-Scan-29-01-PHPUnit(CVE17-9841)...</td>\n",
453
-       "      <td>Attack</td>\n",
454
-       "      <td>2770</td>\n",
455
-       "    </tr>\n",
456
-       "    <tr>\n",
457
-       "      <th>3</th>\n",
458
-       "      <td>[Attack, 중국]</td>\n",
459
-       "      <td>Attack</td>\n",
460
-       "      <td>2689</td>\n",
461
-       "    </tr>\n",
462
-       "    <tr>\n",
463
-       "      <th>4</th>\n",
464
-       "      <td>[Attack, 'RISK_V2.SOURCE_VAL_3=ECSC Black IP']</td>\n",
465
-       "      <td>Attack</td>\n",
466
-       "      <td>1904</td>\n",
467
-       "    </tr>\n",
468
-       "    <tr>\n",
469
-       "      <th>...</th>\n",
470
-       "      <td>...</td>\n",
471
-       "      <td>...</td>\n",
472
-       "      <td>...</td>\n",
473
-       "    </tr>\n",
474
-       "    <tr>\n",
475
-       "      <th>41145</th>\n",
476
-       "      <td>[Attack, TW_ATT_PORT=5389]</td>\n",
477
-       "      <td>Attack</td>\n",
478
-       "      <td>1</td>\n",
479
-       "    </tr>\n",
480
-       "    <tr>\n",
481
-       "      <th>41146</th>\n",
482
-       "      <td>[Attack, TW_ATT_PORT=38677]</td>\n",
483
-       "      <td>Attack</td>\n",
484
-       "      <td>1</td>\n",
485
-       "    </tr>\n",
486
-       "    <tr>\n",
487
-       "      <th>41147</th>\n",
488
-       "      <td>[Attack, TW_ATT_PORT=8287]</td>\n",
489
-       "      <td>Attack</td>\n",
490
-       "      <td>1</td>\n",
491
-       "    </tr>\n",
492
-       "    <tr>\n",
493
-       "      <th>41148</th>\n",
494
-       "      <td>[Attack, TW_ATT_PORT=2404]</td>\n",
495
-       "      <td>Attack</td>\n",
496
-       "      <td>1</td>\n",
497
-       "    </tr>\n",
498
-       "    <tr>\n",
499
-       "      <th>41149</th>\n",
500
-       "      <td>[Seoul Christian University, Malwr]</td>\n",
501
-       "      <td>Malwr</td>\n",
502
-       "      <td>1</td>\n",
503
-       "    </tr>\n",
504
-       "  </tbody>\n",
505
-       "</table>\n",
506
-       "<p>41150 rows × 3 columns</p>\n",
507
-       "</div>"
508
-      ],
509
-      "text/plain": [
510
-       "                                                   Cause  Effect  Frequency\n",
511
-       "0                  [Attack, 'RISK_V2.INTENT_VAL_5=단순침입']  Attack       7709\n",
512
-       "1         [Attack, 'RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']  Attack       3175\n",
513
-       "2      [Attack, Attack-Scan-29-01-PHPUnit(CVE17-9841)...  Attack       2770\n",
514
-       "3                                           [Attack, 중국]  Attack       2689\n",
515
-       "4         [Attack, 'RISK_V2.SOURCE_VAL_3=ECSC Black IP']  Attack       1904\n",
516
-       "...                                                  ...     ...        ...\n",
517
-       "41145                         [Attack, TW_ATT_PORT=5389]  Attack          1\n",
518
-       "41146                        [Attack, TW_ATT_PORT=38677]  Attack          1\n",
519
-       "41147                         [Attack, TW_ATT_PORT=8287]  Attack          1\n",
520
-       "41148                         [Attack, TW_ATT_PORT=2404]  Attack          1\n",
521
-       "41149                [Seoul Christian University, Malwr]   Malwr          1\n",
522
-       "\n",
523
-       "[41150 rows x 3 columns]"
524
-      ]
525
-     },
526
-     "execution_count": 7,
527
-     "metadata": {},
528
-     "output_type": "execute_result"
529
-    }
530
-   ],
531
-   "source": [
532
-    "# 1. 두 아이템의 조합\n",
533
-    "item = 2\n",
534
-    "prefix_of_two = get_combination(NTM_df, item)\n",
535
-    "prefix_of_two"
536
-   ]
537
-  },
538
-  {
539
-   "cell_type": "code",
540
-   "execution_count": 8,
541
-   "metadata": {},
542
-   "outputs": [
543
-    {
544
-     "ename": "KeyboardInterrupt",
545
-     "evalue": "",
546
-     "output_type": "error",
547
-     "traceback": [
548
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
549
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
550
-      "\u001b[1;32m<ipython-input-8-fdb1732ee6a2>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# 2. 세 아이템의 조합\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mprefix_of_three\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_combination\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mNTM_df\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
551
-      "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_combination\u001b[1;34m(arr, n)\u001b[0m\n\u001b[0;32m     19\u001b[0m                     \u001b[0mtemp_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemp_df\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     20\u001b[0m             \u001b[0mcom_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemp_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 21\u001b[1;33m     \u001b[0mprefix\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_prefixspan\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcom_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     22\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mprefix\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     23\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
552
-      "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_prefixspan\u001b[1;34m(load_list)\u001b[0m\n\u001b[0;32m     31\u001b[0m     \u001b[0msave_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m{\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'Frequency'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'Cause'\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     32\u001b[0m     \u001b[0msave_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msave_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Frequency'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mascending\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mignore_index\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m     \u001b[0msave_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_effect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msave_df\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     34\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0msave_df\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     35\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
553
-      "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_effect\u001b[1;34m(edit_df)\u001b[0m\n\u001b[0;32m     45\u001b[0m             \u001b[1;32mfor\u001b[0m \u001b[0mdrule\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdrules\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     46\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[0mitem\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mdrule\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 47\u001b[1;33m                     \u001b[0medit_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Effect'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mitem\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     48\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0medit_df\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
554
-      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m    690\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    691\u001b[0m         \u001b[0miloc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"iloc\"\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 692\u001b[1;33m         \u001b[0miloc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_with_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    693\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    694\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_validate_key\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
555
-      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer\u001b[1;34m(self, indexer, value, name)\u001b[0m\n\u001b[0;32m   1633\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mtake_split_path\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1634\u001b[0m             \u001b[1;31m# We have to operate column-wise\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1635\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_with_indexer_split_path\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1636\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1637\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_single_block\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
556
-      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer_split_path\u001b[1;34m(self, indexer, value, name)\u001b[0m\n\u001b[0;32m   1718\u001b[0m             \u001b[1;31m# scalar value\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1719\u001b[0m             \u001b[1;32mfor\u001b[0m \u001b[0mloc\u001b[0m \u001b[1;32min\u001b[0m \u001b[0milocs\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1720\u001b[1;33m                 \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_single_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpi\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1721\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1722\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_setitem_with_indexer_2d_value\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
557
-      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_single_column\u001b[1;34m(self, loc, value, plane_indexer)\u001b[0m\n\u001b[0;32m   1815\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1816\u001b[0m         \u001b[1;31m# reset the sliced object if unique\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1817\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_iset_item\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mser\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1818\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1819\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_setitem_single_block\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
558
-      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_iset_item\u001b[1;34m(self, loc, value)\u001b[0m\n\u001b[0;32m   3220\u001b[0m         \u001b[1;31m# technically _sanitize_column expects a label, not a position,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3221\u001b[0m         \u001b[1;31m#  but the behavior is the same as long as we pass broadcast=False\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3222\u001b[1;33m         \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sanitize_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbroadcast\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3223\u001b[0m         \u001b[0mNDFrame\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_iset_item\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3224\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
559
-      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_sanitize_column\u001b[1;34m(self, key, value, broadcast)\u001b[0m\n\u001b[0;32m   3874\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3875\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3876\u001b[1;33m             \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mreindexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3877\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3878\u001b[0m         \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
560
-      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mreindexer\u001b[1;34m(value)\u001b[0m\n\u001b[0;32m   3855\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3856\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mequals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mor\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3857\u001b[1;33m                 \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3858\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3859\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
561
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
562
-     ]
563
-    }
564
-   ],
565
-   "source": [
566
-    "# 2. 세 아이템의 조합\n",
567
-    "prefix_of_three = get_combination(NTM_df, 3)"
568
-   ]
569
-  },
570
-  {
571
-   "cell_type": "code",
572
-   "execution_count": null,
573
-   "metadata": {},
574
-   "outputs": [],
575
-   "source": [
576
-    "# 3. 네 아이템의 조합\n",
577
-    "prefix_of_four =  get_combination(NTM_df, 4)"
578
-   ]
579
-  },
580
-  {
581
-   "cell_type": "code",
582
-   "execution_count": null,
583
-   "metadata": {},
584
-   "outputs": [],
585
-   "source": [
586
-    "# 4. 다섯 아이템의 조합\n",
587
-    "prefix_of_five = get_combination(NTM_df, 5)"
588
-   ]
589
-  },
590
-  {
591
-   "cell_type": "code",
592
-   "execution_count": null,
593
-   "metadata": {},
594
-   "outputs": [],
595
-   "source": [
596
-    "# 5. 여섯 아이템의 조합\n",
597
-    "prefix_of_six  = get_combination(NTM_df, 6)\n",
598
-    "##################### NTM section End #####################"
599
-   ]
600
-  },
601
-  {
602
-   "cell_type": "code",
603
-   "execution_count": null,
604
-   "metadata": {},
605
-   "outputs": [],
606
-   "source": [
607
-    "##################### MTM section #####################\n",
608
-    "# Same goes for the MTM section\n",
609
-    "\n",
610
-    "# In[375]:\n",
611
-    "\n",
612
-    "\n",
613
-    "MTM_df=df[df['ACCD_FIND_MTD_CODE']==2]\n",
614
-    "len(MTM_df)\n",
615
-    "\n",
616
-    "\n",
617
-    "# In[376]:\n",
618
-    "\n",
619
-    "\n",
620
-    "# Pick out it in order to get the asset, risk, intent, black IP out\n",
621
-    "RISK_V2_MTM=MTM_df['RISK_V2']\n",
622
-    "\n",
623
-    "RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()\n",
624
-    "print(RISK_V2_MTM.size)\n",
625
-    "print(RISK_V2_FILTERED_MTM.size)\n",
626
-    "\n",
627
-    "risk_df_MTM = pd.DataFrame()\n",
628
-    "for newVal_MTM in RISK_V2_FILTERED_MTM:\n",
629
-    "    newVal_MTM = newVal_MTM.replace(\"'\", \"\\\"\")\n",
630
-    "    newVal_MTM_str = json.loads(newVal_MTM)\n",
631
-    "    newVal_df_MTM = json_normalize(newVal_MTM_str) \n",
632
-    "    risk_df_MTM = pd.concat([risk_df_MTM,newVal_df_MTM],ignore_index=True) \n",
633
-    "    \n",
634
-    "risk_df_col_MTM = risk_df_MTM.columns.values.tolist()\n",
635
-    "\n",
636
-    "# In[377]:\n",
637
-    "\n",
638
-    "\n",
639
-    "asset_val_MTM = []\n",
640
-    "intent_val_MTM=[]\n",
641
-    "source_val_MTM=[]\n",
642
-    "\n",
643
-    "def filter_assets_value_MTM(risk):\n",
644
-    "    for i in range(len(risk)):\n",
645
-    "        risks=[]\n",
646
-    "        intents=[]\n",
647
-    "        sources=[]\n",
648
-    "        try:\n",
649
-    "            for key in risk_df_col:\n",
650
-    "                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:\n",
651
-    "                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)\n",
652
-    "                    risks.append(risk_key_desc)\n",
653
-    "                if 'INTENT_VAL_' in key and risk.iloc[i][key]:\n",
654
-    "                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)\n",
655
-    "                    intents.append(intent_key_desc)\n",
656
-    "                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:\n",
657
-    "                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)\n",
658
-    "                    sources.append(source_key_desc)\n",
659
-    "        except:\n",
660
-    "            print(risk)\n",
661
-    "            print(type(risk))\n",
662
-    "        finally:\n",
663
-    "            asset_val_MTM.append(risks)\n",
664
-    "            intent_val_MTM.append(intents)\n",
665
-    "            source_val_MTM.append(sources)\n",
666
-    "\n",
667
-    "# In[378]:\n",
668
-    "\n",
669
-    "# modified\n",
670
-    "def get_asset_desc_MTM(asset_field):\n",
671
-    "    if asset_field == 'ASSETS_VAL_1':\n",
672
-    "        return '공인-전체IP대역(유선)'\n",
673
-    "    elif asset_field == 'ASSETS_VAL_2':\n",
674
-    "        return '공인-전체IP대역(무선)'\n",
675
-    "    elif asset_field == 'ASSETS_VAL_3':\n",
676
-    "        return '공인-WEB서버'\n",
677
-    "    elif asset_field == 'ASSETS_VAL_4':\n",
678
-    "        return '공인-내부응용서버'\n",
679
-    "    elif asset_field == 'ASSETS_VAL_5':\n",
680
-    "        return '공인-DB서버'\n",
681
-    "    elif asset_field == 'ASSETS_VAL_6':\n",
682
-    "        return '공인-패치서버'\n",
683
-    "    elif asset_field == 'ASSETS_VAL_7':\n",
684
-    "        return '공인-네트워크'\n",
685
-    "    elif asset_field == 'ASSETS_VAL_8':\n",
686
-    "        return '공인-보안'\n",
687
-    "    elif asset_field == 'ASSETS_VAL_9':\n",
688
-    "        return '공인-업무용PC'\n",
689
-    "    elif asset_field == 'ASSETS_VAL_10':\n",
690
-    "        return '공인-비업무용PC'\n",
691
-    "    elif asset_field == 'ASSETS_VAL_11':\n",
692
-    "        return '공인-기타'\n",
693
-    "    elif asset_field == 'ASSETS_VAL_12':\n",
694
-    "        return '사설-전체IP대역(유선)'\n",
695
-    "    elif asset_field == 'ASSETS_VAL_13':\n",
696
-    "        return '사설-전체IP대역(무선)'\n",
697
-    "    elif asset_field == 'ASSETS_VAL_14':\n",
698
-    "        return '사설-WEB서버'\n",
699
-    "    elif asset_field == 'ASSETS_VAL_15':\n",
700
-    "        return '사설-내부응용서버'\n",
701
-    "    elif asset_field == 'ASSETS_VAL_16':\n",
702
-    "        return '사설-DB서버'\n",
703
-    "    elif asset_field == 'ASSETS_VAL_17':\n",
704
-    "        return '사설-패치서버'\n",
705
-    "    elif asset_field == 'ASSETS_VAL_18':\n",
706
-    "        return '사설-네트워크'\n",
707
-    "    elif asset_field == 'ASSETS_VAL_19':\n",
708
-    "        return '사설-보안'\n",
709
-    "    elif asset_field == 'ASSETS_VAL_20':\n",
710
-    "        return '사설-업무용PC'\n",
711
-    "    elif asset_field == 'ASSETS_VAL_21':\n",
712
-    "        return '사설-비업무용PC'\n",
713
-    "    elif asset_field == 'ASSETS_VAL_22':\n",
714
-    "        return '사설-기타'\n",
715
-    "    else:\n",
716
-    "        return ''\n",
717
-    "\n",
718
-    "\n",
719
-    "# In[381]:\n",
720
-    "\n",
721
-    "\n",
722
-    "# modified\n",
723
-    "def filter_intent_MTM(intent):\n",
724
-    "    intents=[]\n",
725
-    "    for intent_key in intent:\n",
726
-    "        if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
727
-    "            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
728
-    "            intents.append(intent_key_desc)\n",
729
-    "    return intents\n",
730
-    "\n",
731
-    "\n",
732
-    "# In[382]:\n",
733
-    "\n",
734
-    "\n",
735
-    "def get_intent_desc_MTM(intent_field):\n",
736
-    "    if intent_field == 'INTENT_VAL_1':\n",
737
-    "        return '파괴'\n",
738
-    "    elif intent_field == 'INTENT_VAL_2':\n",
739
-    "        return '유출'\n",
740
-    "    elif intent_field == 'INTENT_VAL_3':\n",
741
-    "        return '지연'\n",
742
-    "    elif intent_field == 'INTENT_VAL_4':\n",
743
-    "        return '잠복'\n",
744
-    "    elif intent_field == 'INTENT_VAL_5':\n",
745
-    "        return '단순침입'\n",
746
-    "    elif intent_field == 'INTENT_VAL_6':\n",
747
-    "        return 'MD5'\n",
748
-    "    elif intent_field == 'INTENT_VAL_0':\n",
749
-    "        return 'Default'\n",
750
-    "    else:\n",
751
-    "        return ''\n",
752
-    "\n",
753
-    "\n",
754
-    "\n",
755
-    "# In[384]:\n",
756
-    "\n",
757
-    "\n",
758
-    "# modified\n",
759
-    "def filter_source_MTM(source):\n",
760
-    "    sources=[]\n",
761
-    "    for source_key in source:\n",
762
-    "        if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
763
-    "            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
764
-    "            sources.append(source_key_desc)\n",
765
-    "    return sources\n",
766
-    "\n",
767
-    "\n",
768
-    "# In[385]:\n",
769
-    "\n",
770
-    "\n",
771
-    "def get_source_desc_MTM(source_field):\n",
772
-    "    if source_field=='SOURCE_VAL_1':\n",
773
-    "        return '북한IP'\n",
774
-    "    if source_field=='SOURCE_VAL_3':\n",
775
-    "        return 'ECSC Black IP'\n",
776
-    "    else:\n",
777
-    "        return ''\n",
778
-    "\n",
779
-    "\n",
780
-    "# In[386]:\n",
781
-    "\n",
782
-    "filter_assets_value(risk_df_MTM)\n",
783
-    "#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기\n",
784
-    "# New assets column\n",
785
-    "MTM_df['ASSETS_VAL']= asset_val_MTM\n",
786
-    "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)\n",
787
-    "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace('[','', regex=False)\n",
788
-    "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace(']','', regex=False)\n",
789
-    "MTM_df[:1]\n",
790
-    "# New column of intent value\n",
791
-    "MTM_df['INTENT_VAL']=intent_val_MTM\n",
792
-    "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)\n",
793
-    "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace('[','',regex=False)\n",
794
-    "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace(']','',regex=False)\n",
795
-    "MTM_df[:1]\n",
796
-    "# New column of SOURCE_VAL value\n",
797
-    "MTM_df['SOURCE_VAL']=source_val_MTM\n",
798
-    "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n",
799
-    "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)\n",
800
-    "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)\n",
801
-    "MTM_df[:5]\n",
802
-    "\n",
803
-    "# In[361]:\n",
804
-    "MTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
805
-    "MTM_df.columns\n",
806
-    "\n",
807
-    "\n",
808
-    "# In[388]:\n",
809
-    "\n",
810
-    "\n",
811
-    "MTM_df.isna().sum()\n",
812
-    "\n",
813
-    "\n",
814
-    "# In[389]:\n",
815
-    "\n",
816
-    "\n",
817
-    "# Change the Nan to zero\n",
818
-    "MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
819
-    "MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')\n",
820
-    "MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
821
-    "MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
822
-    "MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
823
-    "MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
824
-    "MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
825
-    "MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
826
-    "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
827
-    "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)\n",
828
-    "MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
829
-    "MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')\n",
830
-    "\n",
831
-    "\n",
832
-    "# In[390]:\n",
833
-    "\n",
834
-    "\n",
835
-    "# Check NaN out again\n",
836
-    "MTM_df.isna().sum()\n",
837
-    "\n",
838
-    "\n",
839
-    "# In[391]:\n",
840
-    "\n",
841
-    "# ACCD_FIND_MTD_CODE col 지우기\n",
842
-    "MTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)\n",
843
-    "\n",
844
-    "# arr를 매개변수로 받아 n개의 아이템의 조합 반환\n",
845
-    "def get_combination_MTM(arr, n):\n",
846
-    "    combination_n = list(itertools.combinations(arr.columns.tolist(),n))\n",
847
-    "    combination_n = [com for com in combination_n if 'DRULE_ATT_TYPE_CODE1' in com]\n",
848
-    "    com_list=[]\n",
849
-    "    for m in range(len(combination_n)):\n",
850
-    "        for i in range(len(arr)):\n",
851
-    "            temp_list=[]\n",
852
-    "            temp_df = arr.iloc[i]\n",
853
-    "            for col in combination_n[m]:\n",
854
-    "                # 공백 처리\n",
855
-    "                if(temp_df[col]==''):\n",
856
-    "                    break\n",
857
-    "                else:\n",
858
-    "                    temp_list.append(temp_df[col])\n",
859
-    "            com_list.append(temp_list)\n",
860
-    "    prefix = get_prefixspan_MTM(com_list)\n",
861
-    "    return prefix\n",
862
-    "\n",
863
-    "def get_prefixspan_MTM(load_list):\n",
864
-    "    n = len(load_list[0])\n",
865
-    "    save_list = PrefixSpan(load_list)\n",
866
-    "    #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 \n",
867
-    "    # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정\n",
868
-    "    save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>=n)\n",
869
-    "    save_df = pd.DataFrame(save_list)\n",
870
-    "    save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
871
-    "    save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)\n",
872
-    "    save_df = get_effect_MTM(save_df)\n",
873
-    "    return save_df\n",
874
-    "\n",
875
-    "def get_effect_MTM(edit_df):\n",
876
-    "    #Make the new column for filling the Effect\n",
877
-    "    edit_df['Effect']=np.nan\n",
878
-    "     #Change the order of columns\n",
879
-    "    edit_df=edit_df[['Cause','Effect','Frequency']]\n",
880
-    "    for i in range(len(edit_df)):\n",
881
-    "        drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
882
-    "        temp_df = edit_df.loc[i]\n",
883
-    "        for item in temp_df['Cause']:\n",
884
-    "            for drule in drules:\n",
885
-    "                if item == drule:\n",
886
-    "                    edit_df.loc[i,'Effect'] = item\n",
887
-    "    return edit_df\n",
888
-    "\n",
889
-    "\n",
890
-    "\n",
891
-    "# 1. 두 아이템의 조합\n",
892
-    "prefix_of_two_MTM = get_combination(MTM_df,2)\n",
893
-    "\n",
894
-    "# 2. 세 아이템의 조합\n",
895
-    "prefix_of_three_MTM = get_combination(MTM_df, 3)\n",
896
-    "\n",
897
-    "# 3. 네 아이템의 조합\n",
898
-    "prefix_of_four_MTM = get_combination(MTM_df, 4)\n",
899
-    "\n",
900
-    "# 4. 다섯 아이템의 조합\n",
901
-    "prefix_of_five_MTM = get_combination(MTM_df, 5)\n",
902
-    "\n",
903
-    "\n",
904
-    "# 5. 여섯 아이템의 조합\n",
905
-    "prefix_of_six_MTM = get_combination(MTM_df, 6)\n",
906
-    "\n",
907
-    "##################### MTM section End #####################"
908
-   ]
909
-  }
910
- ],
911
- "metadata": {
912
-  "anaconda-cloud": {},
913
-  "kernelspec": {
914
-   "display_name": "Python 3",
915
-   "language": "python",
916
-   "name": "python3"
917
-  },
918
-  "language_info": {
919
-   "codemirror_mode": {
920
-    "name": "ipython",
921
-    "version": 3
922
-   },
923
-   "file_extension": ".py",
924
-   "mimetype": "text/x-python",
925
-   "name": "python",
926
-   "nbconvert_exporter": "python",
927
-   "pygments_lexer": "ipython3",
928
-   "version": "3.8.8"
929
-  }
930
- },
931
- "nbformat": 4,
932
- "nbformat_minor": 4
933
-}

Загрузка…
Отмена
Сохранить