瀏覽代碼

upload 'PrefixSpan_20211021.py'

- MTM Section 추가
- 'DRULE_ATT_TYPE_CODE1' 데이터 포함
- 한번의 함수 호출로 결과를 추출할 수 있도록 수정
- 공백문자 처리
master
yevKwon 4 年之前
父節點
當前提交
6812d184c0
共有 1 個檔案被更改,包括 933 行新增0 行删除
  1. 933
    0
      keris.ipynb/PrefixSpan_20211021.ipynb

+ 933
- 0
keris.ipynb/PrefixSpan_20211021.ipynb 查看文件

1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "markdown",
5
+   "metadata": {},
6
+   "source": [
7
+    "<p>NTM(유해트래픽 탐지장비)</p>\n",
8
+    "<p>MTM(악성파일 탐지장비)</p>"
9
+   ]
10
+  },
11
+  {
12
+   "cell_type": "code",
13
+   "execution_count": 1,
14
+   "metadata": {},
15
+   "outputs": [
16
+    {
17
+     "name": "stdout",
18
+     "output_type": "stream",
19
+     "text": [
20
+      "10000\n",
21
+      "10000\n"
22
+     ]
23
+    }
24
+   ],
25
+   "source": [
26
+    "#!/usr/bin/env python\n",
27
+    "# coding: utf-8\n",
28
+    "\n",
29
+    "import pandas as pd\n",
30
+    "import numpy as np\n",
31
+    "from mlxtend.preprocessing import TransactionEncoder\n",
32
+    "from mlxtend.frequent_patterns import association_rules, fpgrowth\n",
33
+    "from prefixspan import PrefixSpan\n",
34
+    "\n",
35
+    "# load ts_data_accident-2020_sample.csv\n",
36
+    "# to prevent dtypewarning, set low_memory=False\n",
37
+    "df = pd.read_csv('ts_data_accident-2020_sample.csv', low_memory=False)\n",
38
+    "df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()\n",
39
+    "len(df) #len(df) : 10000, load successful\n",
40
+    "\n",
41
+    "##################### NTM section #####################\n",
42
+    "NTM_df=df[df['ACCD_FIND_MTD_CODE']==1] #* edit'1' to 1\n",
43
+    "len(NTM_df)\n",
44
+    "#* NTM_df.head()\n",
45
+    "\n",
46
+    "# Pick out it in order to get the asset, risk, intent, black IP out\n",
47
+    "RISK_V2=NTM_df['RISK_V2']\n",
48
+    "\n",
49
+    "RISK_V2_FILTERED=RISK_V2.dropna()\n",
50
+    "print(RISK_V2.size)\n",
51
+    "print(RISK_V2_FILTERED.size)\n",
52
+    "\n",
53
+    "#* 추가 : 기존 filter_assets_value 사용시 값을 인식하지 못하는 문제 발생 -> RISK_V2를 별도의 df로 수정\n",
54
+    "import json\n",
55
+    "from pandas import json_normalize\n",
56
+    "risk_df = pd.DataFrame()\n",
57
+    "for newVal in RISK_V2_FILTERED:\n",
58
+    "    newVal = newVal.replace(\"'\", \"\\\"\")\n",
59
+    "    newVal_str = json.loads(newVal)\n",
60
+    "    newVal_df = json_normalize(newVal_str) \n",
61
+    "    risk_df = pd.concat([risk_df,newVal_df],ignore_index=True) \n",
62
+    "    \n",
63
+    "risk_df_col = risk_df.columns.values.tolist()\n",
64
+    "\n",
65
+    "# In[352]:\n",
66
+    "asset_val = []\n",
67
+    "intent_val=[]\n",
68
+    "source_val=[]\n",
69
+    "def filter_assets_value(risk):\n",
70
+    "    for i in range(len(risk)):\n",
71
+    "        risks=[]\n",
72
+    "        intents=[]\n",
73
+    "        sources=[]\n",
74
+    "        try:\n",
75
+    "            for key in risk_df_col:\n",
76
+    "                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:\n",
77
+    "                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)\n",
78
+    "                    risks.append(risk_key_desc)\n",
79
+    "                if 'INTENT_VAL_' in key and risk.iloc[i][key]:\n",
80
+    "                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)\n",
81
+    "                    intents.append(intent_key_desc)\n",
82
+    "                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:\n",
83
+    "                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)\n",
84
+    "                    sources.append(source_key_desc)\n",
85
+    "        except:\n",
86
+    "            print(risk)\n",
87
+    "            print(type(risk))\n",
88
+    "        finally:\n",
89
+    "            asset_val.append(risks)\n",
90
+    "            intent_val.append(intents)\n",
91
+    "            source_val.append(sources)\n",
92
+    "    \n",
93
+    "    \n",
94
+    "# modified\n",
95
+    "def get_asset_desc(asset_field):\n",
96
+    "    if asset_field == 'ASSETS_VAL_1':\n",
97
+    "        return '공인-전체IP대역(유선)'\n",
98
+    "    elif asset_field == 'ASSETS_VAL_2':\n",
99
+    "        return '공인-전체IP대역(무선)'\n",
100
+    "    elif asset_field == 'ASSETS_VAL_3':\n",
101
+    "        return '공인-WEB서버'\n",
102
+    "    elif asset_field == 'ASSETS_VAL_4':\n",
103
+    "        return '공인-내부응용서버'\n",
104
+    "    elif asset_field == 'ASSETS_VAL_5':\n",
105
+    "        return '공인-DB서버'\n",
106
+    "    elif asset_field == 'ASSETS_VAL_6':\n",
107
+    "        return '공인-패치서버'\n",
108
+    "    elif asset_field == 'ASSETS_VAL_7':\n",
109
+    "        return '공인-네트워크'\n",
110
+    "    elif asset_field == 'ASSETS_VAL_8':\n",
111
+    "        return '공인-보안'\n",
112
+    "    elif asset_field == 'ASSETS_VAL_9':\n",
113
+    "        return '공인-업무용PC'\n",
114
+    "    elif asset_field == 'ASSETS_VAL_10':\n",
115
+    "        return '공인-비업무용PC'\n",
116
+    "    elif asset_field == 'ASSETS_VAL_11':\n",
117
+    "        return '공인-기타'\n",
118
+    "    elif asset_field == 'ASSETS_VAL_12':\n",
119
+    "        return '사설-전체IP대역(유선)'\n",
120
+    "    elif asset_field == 'ASSETS_VAL_13':\n",
121
+    "        return '사설-전체IP대역(무선)'\n",
122
+    "    elif asset_field == 'ASSETS_VAL_14':\n",
123
+    "        return '사설-WEB서버'\n",
124
+    "    elif asset_field == 'ASSETS_VAL_15':\n",
125
+    "        return '사설-내부응용서버'\n",
126
+    "    elif asset_field == 'ASSETS_VAL_16':\n",
127
+    "        return '사설-DB서버'\n",
128
+    "    elif asset_field == 'ASSETS_VAL_17':\n",
129
+    "        return '사설-패치서버'\n",
130
+    "    elif asset_field == 'ASSETS_VAL_18':\n",
131
+    "        return '사설-네트워크'\n",
132
+    "    elif asset_field == 'ASSETS_VAL_19':\n",
133
+    "        return '사설-보안'\n",
134
+    "    elif asset_field == 'ASSETS_VAL_20':\n",
135
+    "        return '사설-업무용PC'\n",
136
+    "    elif asset_field == 'ASSETS_VAL_21':\n",
137
+    "        return '사설-비업무용PC'\n",
138
+    "    elif asset_field == 'ASSETS_VAL_22':\n",
139
+    "        return '사설-기타'\n",
140
+    "    else:\n",
141
+    "        return ''\n",
142
+    "\n",
143
+    "\n",
144
+    "\n",
145
+    "# modified\n",
146
+    "def filter_intent(intent):\n",
147
+    "    intents=[]\n",
148
+    "    for intent_key in intent:\n",
149
+    "        if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
150
+    "            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
151
+    "            intents.append(intent_key_desc)\n",
152
+    "    return intents\n",
153
+    "\n",
154
+    "\n",
155
+    "# In[356]:\n",
156
+    "\n",
157
+    "\n",
158
+    "def get_intent_desc(intent_field):\n",
159
+    "    if intent_field == 'INTENT_VAL_1':\n",
160
+    "        return '파괴'\n",
161
+    "    elif intent_field == 'INTENT_VAL_2':\n",
162
+    "        return '유출'\n",
163
+    "    elif intent_field == 'INTENT_VAL_3':\n",
164
+    "        return '지연'\n",
165
+    "    elif intent_field == 'INTENT_VAL_4':\n",
166
+    "        return '잠복'\n",
167
+    "    elif intent_field == 'INTENT_VAL_5':\n",
168
+    "        return '단순침입'\n",
169
+    "    elif intent_field == 'INTENT_VAL_6':\n",
170
+    "        return 'MD5'\n",
171
+    "    elif intent_field == 'INTENT_VAL_0':\n",
172
+    "        return 'Default'\n",
173
+    "    else:\n",
174
+    "        return ''\n",
175
+    "\n",
176
+    "\n",
177
+    "# In[358]:\n",
178
+    "\n",
179
+    "\n",
180
+    "# modified\n",
181
+    "def filter_source(source):\n",
182
+    "    sources=[]\n",
183
+    "    for source_key in source:\n",
184
+    "        if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
185
+    "            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
186
+    "            sources.append(source_key_desc)\n",
187
+    "    return sources\n",
188
+    "\n",
189
+    "\n",
190
+    "# In[359]:\n",
191
+    "\n",
192
+    "\n",
193
+    "def get_source_desc(source_field):\n",
194
+    "    if source_field=='SOURCE_VAL_1':\n",
195
+    "        return '북한IP'\n",
196
+    "    if source_field=='SOURCE_VAL_3':\n",
197
+    "        return 'ECSC Black IP'\n",
198
+    "    else:\n",
199
+    "        return ''\n",
200
+    "\n",
201
+    "\n"
202
+   ]
203
+  },
204
+  {
205
+   "cell_type": "code",
206
+   "execution_count": 2,
207
+   "metadata": {},
208
+   "outputs": [
209
+    {
210
+     "data": {
211
+      "text/plain": [
212
+       "Index(['INST_NM', 'DRULE_ATT_TYPE_CODE1', 'TW_ATT_IP', 'TW_ATT_PORT',\n",
213
+       "       'TW_DMG_IP', 'TW_DMG_PORT', 'ACCD_DMG_PROTO_NM', 'TW_ATT_CT_NM',\n",
214
+       "       'ACCD_FIND_MTD_CODE', 'DRULE_NM', 'ASSETS_VAL', 'INTENT_VAL',\n",
215
+       "       'SOURCE_VAL'],\n",
216
+       "      dtype='object')"
217
+      ]
218
+     },
219
+     "execution_count": 2,
220
+     "metadata": {},
221
+     "output_type": "execute_result"
222
+    }
223
+   ],
224
+   "source": [
225
+    "filter_assets_value(risk_df)\n",
226
+    "#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기\n",
227
+    "# New assets column\n",
228
+    "NTM_df['ASSETS_VAL']= asset_val\n",
229
+    "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)\n",
230
+    "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace('[','', regex=False)\n",
231
+    "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace(']','', regex=False)\n",
232
+    "NTM_df[:1]\n",
233
+    "# New column of intent value\n",
234
+    "NTM_df['INTENT_VAL']=intent_val\n",
235
+    "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)\n",
236
+    "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace('[','',regex=False)\n",
237
+    "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace(']','',regex=False)\n",
238
+    "NTM_df[:1]\n",
239
+    "# New column of SOURCE_VAL value\n",
240
+    "NTM_df['SOURCE_VAL']=source_val\n",
241
+    "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n",
242
+    "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)\n",
243
+    "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)\n",
244
+    "NTM_df[:5]\n",
245
+    "\n",
246
+    "# In[361]:\n",
247
+    "NTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
248
+    "NTM_df.columns"
249
+   ]
250
+  },
251
+  {
252
+   "cell_type": "code",
253
+   "execution_count": 3,
254
+   "metadata": {},
255
+   "outputs": [],
256
+   "source": [
257
+    "#data frame의 i번째 row를 list로 저장하여 itertools.combinations로 모든 조합 만들 예정\n",
258
+    "#TW_ATT_IP와 TW_DMG_IP의 값이 같은 경우 어떤 값과의 관계인지 알 수 없으므로 데이터 가공\n",
259
+    "NTM_df['TW_ATT_IP']=\"TW_ATT_IP=\"+NTM_df['TW_ATT_IP'].astype(str)\n",
260
+    "NTM_df['TW_ATT_PORT']=\"TW_ATT_PORT=\"+NTM_df['TW_ATT_PORT'].astype(str)\n",
261
+    "NTM_df['TW_DMG_IP']=\"TW_DMG_IP=\"+NTM_df['TW_DMG_IP'].astype(str)\n",
262
+    "NTM_df['TW_DMG_PORT']=\"TW_DMG_PORT=\"+NTM_df['TW_DMG_PORT'].astype(str)"
263
+   ]
264
+  },
265
+  {
266
+   "cell_type": "code",
267
+   "execution_count": 4,
268
+   "metadata": {},
269
+   "outputs": [
270
+    {
271
+     "data": {
272
+      "text/plain": [
273
+       "INST_NM                 0\n",
274
+       "DRULE_ATT_TYPE_CODE1    0\n",
275
+       "TW_ATT_IP               0\n",
276
+       "TW_ATT_PORT             0\n",
277
+       "TW_DMG_IP               0\n",
278
+       "TW_DMG_PORT             0\n",
279
+       "ACCD_DMG_PROTO_NM       0\n",
280
+       "TW_ATT_CT_NM            0\n",
281
+       "ACCD_FIND_MTD_CODE      0\n",
282
+       "DRULE_NM                0\n",
283
+       "ASSETS_VAL              0\n",
284
+       "INTENT_VAL              0\n",
285
+       "SOURCE_VAL              0\n",
286
+       "dtype: int64"
287
+      ]
288
+     },
289
+     "execution_count": 4,
290
+     "metadata": {},
291
+     "output_type": "execute_result"
292
+    }
293
+   ],
294
+   "source": [
295
+    "##################### 여기서부터 진행하시면 됩니다. #####################\n",
296
+    "##################### 아래 12개 아이템(12. 장비 ACCD_FIND_MTD_CODE 제외)에 대해서 모든 아이템 조합에 알고리즘 적용하기#####################\n",
297
+    "\n",
298
+    "# It should be 13 columns in total\n",
299
+    "\n",
300
+    "# 1. 기관 INST_NM\n",
301
+    "# 2. 공격 DRULE_ATT_TYPE_CODE1\n",
302
+    "# 3. 자산 ASSETS_VAL\n",
303
+    "# 4. 위협공격ip TW_ATT_IP\n",
304
+    "# 5. 위협공격port TW_ATT_PORT\n",
305
+    "# 6. 위협피해ip TW_DMG_IP\n",
306
+    "# 7. 위협피해port TW_DMG_PORT\n",
307
+    "# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM\n",
308
+    "# 9. 공격국가 TW_ATT_CT_NM\n",
309
+    "# 10. 의도(7개) INTENT_VAL\n",
310
+    "# 11. IP/URL 가중치 SOURCE_VAL\n",
311
+    "# 12. 장비 ACCD_FIND_MTD_CODE\n",
312
+    "# 13. 탐지규칙명 DRULE_NM\n",
313
+    "\n",
314
+    "\n",
315
+    "# In[363]:\n",
316
+    "NTM_df.isna().sum()\n",
317
+    "\n",
318
+    "\n",
319
+    "# Change the Nan to zero\n",
320
+    "NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
321
+    "NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')\n",
322
+    "NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
323
+    "NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
324
+    "NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
325
+    "NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
326
+    "NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
327
+    "NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
328
+    "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
329
+    "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)\n",
330
+    "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
331
+    "NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')\n",
332
+    "\n",
333
+    "\n",
334
+    "# Check NaN out again\n",
335
+    "NTM_df.isna().sum()\n"
336
+   ]
337
+  },
338
+  {
339
+   "cell_type": "code",
340
+   "execution_count": 5,
341
+   "metadata": {},
342
+   "outputs": [],
343
+   "source": [
344
+    "# NTM_df의 col을 list로 저장. itertools.combinations로 가능한 시나리오 모두 추출\n",
345
+    "\n",
346
+    "# ACCD_FIND_MTD_CODE col 지우기\n",
347
+    "NTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)"
348
+   ]
349
+  },
350
+  {
351
+   "cell_type": "code",
352
+   "execution_count": 6,
353
+   "metadata": {},
354
+   "outputs": [],
355
+   "source": [
356
+    "from prefixspan import PrefixSpan\n",
357
+    "import itertools\n",
358
+    "# arr를 매개변수로 받아 n개의 아이템의 조합 반환\n",
359
+    "def get_combination(arr, n):\n",
360
+    "    combination_n = list(itertools.combinations(arr.columns.tolist(),n))\n",
361
+    "    combination_n = [com for com in combination_n if 'DRULE_ATT_TYPE_CODE1' in com]\n",
362
+    "    com_list=[]\n",
363
+    "    # row i 의 (1,2),(1,3)... 이런식으로 하니까 시간 너무 오래걸림\n",
364
+    "    # (1,2) 조합에 대한 row i, row i+1, row i+2... 이렇게 바꿈\n",
365
+    "    for m in range(len(combination_n)):\n",
366
+    "        for i in range(len(arr)):\n",
367
+    "            temp_list=[]\n",
368
+    "            temp_df = arr.iloc[i]\n",
369
+    "            for col in combination_n[m]:\n",
370
+    "                # 공백 처리\n",
371
+    "                if(temp_df[col]==''):\n",
372
+    "                    break\n",
373
+    "                else:\n",
374
+    "                    temp_list.append(temp_df[col])\n",
375
+    "            com_list.append(temp_list)\n",
376
+    "    prefix = get_prefixspan(com_list)\n",
377
+    "    return prefix\n",
378
+    "\n",
379
+    "def get_prefixspan(load_list):\n",
380
+    "    n = len(load_list[0])\n",
381
+    "    save_list = PrefixSpan(load_list)\n",
382
+    "    #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 \n",
383
+    "    # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정\n",
384
+    "    save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>=n)\n",
385
+    "    save_df = pd.DataFrame(save_list)\n",
386
+    "    save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
387
+    "    save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)\n",
388
+    "    save_df = get_effect(save_df)\n",
389
+    "    return save_df\n",
390
+    "\n",
391
+    "def get_effect(edit_df):\n",
392
+    "    #Make the new column for filling the Effect\n",
393
+    "    edit_df['Effect']=np.nan\n",
394
+    "     #Change the order of columns\n",
395
+    "    edit_df=edit_df[['Cause','Effect','Frequency']]\n",
396
+    "    for i in range(len(edit_df)):\n",
397
+    "        drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
398
+    "        temp_df = edit_df.loc[i]\n",
399
+    "        for item in temp_df['Cause']:\n",
400
+    "            for drule in drules:\n",
401
+    "                if item == drule:\n",
402
+    "                    edit_df.loc[i,'Effect'] = item\n",
403
+    "    return edit_df\n"
404
+   ]
405
+  },
406
+  {
407
+   "cell_type": "code",
408
+   "execution_count": 7,
409
+   "metadata": {},
410
+   "outputs": [
411
+    {
412
+     "data": {
413
+      "text/html": [
414
+       "<div>\n",
415
+       "<style scoped>\n",
416
+       "    .dataframe tbody tr th:only-of-type {\n",
417
+       "        vertical-align: middle;\n",
418
+       "    }\n",
419
+       "\n",
420
+       "    .dataframe tbody tr th {\n",
421
+       "        vertical-align: top;\n",
422
+       "    }\n",
423
+       "\n",
424
+       "    .dataframe thead th {\n",
425
+       "        text-align: right;\n",
426
+       "    }\n",
427
+       "</style>\n",
428
+       "<table border=\"1\" class=\"dataframe\">\n",
429
+       "  <thead>\n",
430
+       "    <tr style=\"text-align: right;\">\n",
431
+       "      <th></th>\n",
432
+       "      <th>Cause</th>\n",
433
+       "      <th>Effect</th>\n",
434
+       "      <th>Frequency</th>\n",
435
+       "    </tr>\n",
436
+       "  </thead>\n",
437
+       "  <tbody>\n",
438
+       "    <tr>\n",
439
+       "      <th>0</th>\n",
440
+       "      <td>[Attack, 'RISK_V2.INTENT_VAL_5=단순침입']</td>\n",
441
+       "      <td>Attack</td>\n",
442
+       "      <td>7709</td>\n",
443
+       "    </tr>\n",
444
+       "    <tr>\n",
445
+       "      <th>1</th>\n",
446
+       "      <td>[Attack, 'RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
447
+       "      <td>Attack</td>\n",
448
+       "      <td>3175</td>\n",
449
+       "    </tr>\n",
450
+       "    <tr>\n",
451
+       "      <th>2</th>\n",
452
+       "      <td>[Attack, Attack-Scan-29-01-PHPUnit(CVE17-9841)...</td>\n",
453
+       "      <td>Attack</td>\n",
454
+       "      <td>2770</td>\n",
455
+       "    </tr>\n",
456
+       "    <tr>\n",
457
+       "      <th>3</th>\n",
458
+       "      <td>[Attack, 중국]</td>\n",
459
+       "      <td>Attack</td>\n",
460
+       "      <td>2689</td>\n",
461
+       "    </tr>\n",
462
+       "    <tr>\n",
463
+       "      <th>4</th>\n",
464
+       "      <td>[Attack, 'RISK_V2.SOURCE_VAL_3=ECSC Black IP']</td>\n",
465
+       "      <td>Attack</td>\n",
466
+       "      <td>1904</td>\n",
467
+       "    </tr>\n",
468
+       "    <tr>\n",
469
+       "      <th>...</th>\n",
470
+       "      <td>...</td>\n",
471
+       "      <td>...</td>\n",
472
+       "      <td>...</td>\n",
473
+       "    </tr>\n",
474
+       "    <tr>\n",
475
+       "      <th>41145</th>\n",
476
+       "      <td>[Attack, TW_ATT_PORT=5389]</td>\n",
477
+       "      <td>Attack</td>\n",
478
+       "      <td>1</td>\n",
479
+       "    </tr>\n",
480
+       "    <tr>\n",
481
+       "      <th>41146</th>\n",
482
+       "      <td>[Attack, TW_ATT_PORT=38677]</td>\n",
483
+       "      <td>Attack</td>\n",
484
+       "      <td>1</td>\n",
485
+       "    </tr>\n",
486
+       "    <tr>\n",
487
+       "      <th>41147</th>\n",
488
+       "      <td>[Attack, TW_ATT_PORT=8287]</td>\n",
489
+       "      <td>Attack</td>\n",
490
+       "      <td>1</td>\n",
491
+       "    </tr>\n",
492
+       "    <tr>\n",
493
+       "      <th>41148</th>\n",
494
+       "      <td>[Attack, TW_ATT_PORT=2404]</td>\n",
495
+       "      <td>Attack</td>\n",
496
+       "      <td>1</td>\n",
497
+       "    </tr>\n",
498
+       "    <tr>\n",
499
+       "      <th>41149</th>\n",
500
+       "      <td>[Seoul Christian University, Malwr]</td>\n",
501
+       "      <td>Malwr</td>\n",
502
+       "      <td>1</td>\n",
503
+       "    </tr>\n",
504
+       "  </tbody>\n",
505
+       "</table>\n",
506
+       "<p>41150 rows × 3 columns</p>\n",
507
+       "</div>"
508
+      ],
509
+      "text/plain": [
510
+       "                                                   Cause  Effect  Frequency\n",
511
+       "0                  [Attack, 'RISK_V2.INTENT_VAL_5=단순침입']  Attack       7709\n",
512
+       "1         [Attack, 'RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']  Attack       3175\n",
513
+       "2      [Attack, Attack-Scan-29-01-PHPUnit(CVE17-9841)...  Attack       2770\n",
514
+       "3                                           [Attack, 중국]  Attack       2689\n",
515
+       "4         [Attack, 'RISK_V2.SOURCE_VAL_3=ECSC Black IP']  Attack       1904\n",
516
+       "...                                                  ...     ...        ...\n",
517
+       "41145                         [Attack, TW_ATT_PORT=5389]  Attack          1\n",
518
+       "41146                        [Attack, TW_ATT_PORT=38677]  Attack          1\n",
519
+       "41147                         [Attack, TW_ATT_PORT=8287]  Attack          1\n",
520
+       "41148                         [Attack, TW_ATT_PORT=2404]  Attack          1\n",
521
+       "41149                [Seoul Christian University, Malwr]   Malwr          1\n",
522
+       "\n",
523
+       "[41150 rows x 3 columns]"
524
+      ]
525
+     },
526
+     "execution_count": 7,
527
+     "metadata": {},
528
+     "output_type": "execute_result"
529
+    }
530
+   ],
531
+   "source": [
532
+    "# 1. 두 아이템의 조합\n",
533
+    "item = 2\n",
534
+    "prefix_of_two = get_combination(NTM_df, item)\n",
535
+    "prefix_of_two"
536
+   ]
537
+  },
538
+  {
539
+   "cell_type": "code",
540
+   "execution_count": 8,
541
+   "metadata": {},
542
+   "outputs": [
543
+    {
544
+     "ename": "KeyboardInterrupt",
545
+     "evalue": "",
546
+     "output_type": "error",
547
+     "traceback": [
548
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
549
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
550
+      "\u001b[1;32m<ipython-input-8-fdb1732ee6a2>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# 2. 세 아이템의 조합\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mprefix_of_three\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_combination\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mNTM_df\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
551
+      "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_combination\u001b[1;34m(arr, n)\u001b[0m\n\u001b[0;32m     19\u001b[0m                     \u001b[0mtemp_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemp_df\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     20\u001b[0m             \u001b[0mcom_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemp_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 21\u001b[1;33m     \u001b[0mprefix\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_prefixspan\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcom_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     22\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mprefix\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     23\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
552
+      "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_prefixspan\u001b[1;34m(load_list)\u001b[0m\n\u001b[0;32m     31\u001b[0m     \u001b[0msave_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m{\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'Frequency'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'Cause'\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     32\u001b[0m     \u001b[0msave_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msave_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Frequency'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mascending\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mignore_index\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m     \u001b[0msave_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_effect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msave_df\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     34\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0msave_df\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     35\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
553
+      "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_effect\u001b[1;34m(edit_df)\u001b[0m\n\u001b[0;32m     45\u001b[0m             \u001b[1;32mfor\u001b[0m \u001b[0mdrule\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdrules\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     46\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[0mitem\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mdrule\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 47\u001b[1;33m                     \u001b[0medit_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Effect'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mitem\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     48\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0medit_df\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
554
+      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m    690\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    691\u001b[0m         \u001b[0miloc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"iloc\"\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 692\u001b[1;33m         \u001b[0miloc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_with_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    693\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    694\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_validate_key\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
555
+      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer\u001b[1;34m(self, indexer, value, name)\u001b[0m\n\u001b[0;32m   1633\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mtake_split_path\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1634\u001b[0m             \u001b[1;31m# We have to operate column-wise\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1635\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_with_indexer_split_path\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1636\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1637\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_single_block\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
556
+      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer_split_path\u001b[1;34m(self, indexer, value, name)\u001b[0m\n\u001b[0;32m   1718\u001b[0m             \u001b[1;31m# scalar value\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1719\u001b[0m             \u001b[1;32mfor\u001b[0m \u001b[0mloc\u001b[0m \u001b[1;32min\u001b[0m \u001b[0milocs\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1720\u001b[1;33m                 \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_single_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpi\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1721\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1722\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_setitem_with_indexer_2d_value\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
557
+      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_single_column\u001b[1;34m(self, loc, value, plane_indexer)\u001b[0m\n\u001b[0;32m   1815\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1816\u001b[0m         \u001b[1;31m# reset the sliced object if unique\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1817\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_iset_item\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mser\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1818\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1819\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_setitem_single_block\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
558
+      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_iset_item\u001b[1;34m(self, loc, value)\u001b[0m\n\u001b[0;32m   3220\u001b[0m         \u001b[1;31m# technically _sanitize_column expects a label, not a position,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3221\u001b[0m         \u001b[1;31m#  but the behavior is the same as long as we pass broadcast=False\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3222\u001b[1;33m         \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sanitize_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbroadcast\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3223\u001b[0m         \u001b[0mNDFrame\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_iset_item\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3224\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
559
+      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_sanitize_column\u001b[1;34m(self, key, value, broadcast)\u001b[0m\n\u001b[0;32m   3874\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3875\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3876\u001b[1;33m             \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mreindexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3877\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3878\u001b[0m         \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
560
+      "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mreindexer\u001b[1;34m(value)\u001b[0m\n\u001b[0;32m   3855\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3856\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mequals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mor\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3857\u001b[1;33m                 \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3858\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3859\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
561
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
562
+     ]
563
+    }
564
+   ],
565
+   "source": [
566
+    "# 2. 세 아이템의 조합\n",
567
+    "prefix_of_three = get_combination(NTM_df, 3)"
568
+   ]
569
+  },
570
+  {
571
+   "cell_type": "code",
572
+   "execution_count": null,
573
+   "metadata": {},
574
+   "outputs": [],
575
+   "source": [
576
+    "# 3. 네 아이템의 조합\n",
577
+    "prefix_of_four =  get_combination(NTM_df, 4)"
578
+   ]
579
+  },
580
+  {
581
+   "cell_type": "code",
582
+   "execution_count": null,
583
+   "metadata": {},
584
+   "outputs": [],
585
+   "source": [
586
+    "# 4. 다섯 아이템의 조합\n",
587
+    "prefix_of_five = get_combination(NTM_df, 5)"
588
+   ]
589
+  },
590
+  {
591
+   "cell_type": "code",
592
+   "execution_count": null,
593
+   "metadata": {},
594
+   "outputs": [],
595
+   "source": [
596
+    "# 5. 여섯 아이템의 조합\n",
597
+    "prefix_of_six  = get_combination(NTM_df, 6)\n",
598
+    "##################### NTM section End #####################"
599
+   ]
600
+  },
601
+  {
602
+   "cell_type": "code",
603
+   "execution_count": null,
604
+   "metadata": {},
605
+   "outputs": [],
606
+   "source": [
607
+    "##################### MTM section #####################\n",
608
+    "# Same goes for the MTM section\n",
609
+    "\n",
610
+    "# In[375]:\n",
611
+    "\n",
612
+    "\n",
613
+    "MTM_df=df[df['ACCD_FIND_MTD_CODE']==2]\n",
614
+    "len(MTM_df)\n",
615
+    "\n",
616
+    "\n",
617
+    "# In[376]:\n",
618
+    "\n",
619
+    "\n",
620
+    "# Pick out it in order to get the asset, risk, intent, black IP out\n",
621
+    "RISK_V2_MTM=MTM_df['RISK_V2']\n",
622
+    "\n",
623
+    "RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()\n",
624
+    "print(RISK_V2_MTM.size)\n",
625
+    "print(RISK_V2_FILTERED_MTM.size)\n",
626
+    "\n",
627
+    "risk_df_MTM = pd.DataFrame()\n",
628
+    "for newVal_MTM in RISK_V2_FILTERED_MTM:\n",
629
+    "    newVal_MTM = newVal_MTM.replace(\"'\", \"\\\"\")\n",
630
+    "    newVal_MTM_str = json.loads(newVal_MTM)\n",
631
+    "    newVal_df_MTM = json_normalize(newVal_MTM_str) \n",
632
+    "    risk_df_MTM = pd.concat([risk_df_MTM,newVal_df_MTM],ignore_index=True) \n",
633
+    "    \n",
634
+    "risk_df_col_MTM = risk_df_MTM.columns.values.tolist()\n",
635
+    "\n",
636
+    "# In[377]:\n",
637
+    "\n",
638
+    "\n",
639
+    "asset_val_MTM = []\n",
640
+    "intent_val_MTM=[]\n",
641
+    "source_val_MTM=[]\n",
642
+    "\n",
643
+    "def filter_assets_value_MTM(risk):\n",
644
+    "    for i in range(len(risk)):\n",
645
+    "        risks=[]\n",
646
+    "        intents=[]\n",
647
+    "        sources=[]\n",
648
+    "        try:\n",
649
+    "            for key in risk_df_col:\n",
650
+    "                if 'ASSETS_VAL_' in key and risk.iloc[i][key]:\n",
651
+    "                    risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)\n",
652
+    "                    risks.append(risk_key_desc)\n",
653
+    "                if 'INTENT_VAL_' in key and risk.iloc[i][key]:\n",
654
+    "                    intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)\n",
655
+    "                    intents.append(intent_key_desc)\n",
656
+    "                if 'SOURCE_VAL_' in key and risk.iloc[i][key]:\n",
657
+    "                    source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)\n",
658
+    "                    sources.append(source_key_desc)\n",
659
+    "        except:\n",
660
+    "            print(risk)\n",
661
+    "            print(type(risk))\n",
662
+    "        finally:\n",
663
+    "            asset_val_MTM.append(risks)\n",
664
+    "            intent_val_MTM.append(intents)\n",
665
+    "            source_val_MTM.append(sources)\n",
666
+    "\n",
667
+    "# In[378]:\n",
668
+    "\n",
669
+    "# modified\n",
670
+    "def get_asset_desc_MTM(asset_field):\n",
671
+    "    if asset_field == 'ASSETS_VAL_1':\n",
672
+    "        return '공인-전체IP대역(유선)'\n",
673
+    "    elif asset_field == 'ASSETS_VAL_2':\n",
674
+    "        return '공인-전체IP대역(무선)'\n",
675
+    "    elif asset_field == 'ASSETS_VAL_3':\n",
676
+    "        return '공인-WEB서버'\n",
677
+    "    elif asset_field == 'ASSETS_VAL_4':\n",
678
+    "        return '공인-내부응용서버'\n",
679
+    "    elif asset_field == 'ASSETS_VAL_5':\n",
680
+    "        return '공인-DB서버'\n",
681
+    "    elif asset_field == 'ASSETS_VAL_6':\n",
682
+    "        return '공인-패치서버'\n",
683
+    "    elif asset_field == 'ASSETS_VAL_7':\n",
684
+    "        return '공인-네트워크'\n",
685
+    "    elif asset_field == 'ASSETS_VAL_8':\n",
686
+    "        return '공인-보안'\n",
687
+    "    elif asset_field == 'ASSETS_VAL_9':\n",
688
+    "        return '공인-업무용PC'\n",
689
+    "    elif asset_field == 'ASSETS_VAL_10':\n",
690
+    "        return '공인-비업무용PC'\n",
691
+    "    elif asset_field == 'ASSETS_VAL_11':\n",
692
+    "        return '공인-기타'\n",
693
+    "    elif asset_field == 'ASSETS_VAL_12':\n",
694
+    "        return '사설-전체IP대역(유선)'\n",
695
+    "    elif asset_field == 'ASSETS_VAL_13':\n",
696
+    "        return '사설-전체IP대역(무선)'\n",
697
+    "    elif asset_field == 'ASSETS_VAL_14':\n",
698
+    "        return '사설-WEB서버'\n",
699
+    "    elif asset_field == 'ASSETS_VAL_15':\n",
700
+    "        return '사설-내부응용서버'\n",
701
+    "    elif asset_field == 'ASSETS_VAL_16':\n",
702
+    "        return '사설-DB서버'\n",
703
+    "    elif asset_field == 'ASSETS_VAL_17':\n",
704
+    "        return '사설-패치서버'\n",
705
+    "    elif asset_field == 'ASSETS_VAL_18':\n",
706
+    "        return '사설-네트워크'\n",
707
+    "    elif asset_field == 'ASSETS_VAL_19':\n",
708
+    "        return '사설-보안'\n",
709
+    "    elif asset_field == 'ASSETS_VAL_20':\n",
710
+    "        return '사설-업무용PC'\n",
711
+    "    elif asset_field == 'ASSETS_VAL_21':\n",
712
+    "        return '사설-비업무용PC'\n",
713
+    "    elif asset_field == 'ASSETS_VAL_22':\n",
714
+    "        return '사설-기타'\n",
715
+    "    else:\n",
716
+    "        return ''\n",
717
+    "\n",
718
+    "\n",
719
+    "# In[381]:\n",
720
+    "\n",
721
+    "\n",
722
+    "# modified\n",
723
+    "def filter_intent_MTM(intent):\n",
724
+    "    intents=[]\n",
725
+    "    for intent_key in intent:\n",
726
+    "        if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
727
+    "            intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
728
+    "            intents.append(intent_key_desc)\n",
729
+    "    return intents\n",
730
+    "\n",
731
+    "\n",
732
+    "# In[382]:\n",
733
+    "\n",
734
+    "\n",
735
+    "def get_intent_desc_MTM(intent_field):\n",
736
+    "    if intent_field == 'INTENT_VAL_1':\n",
737
+    "        return '파괴'\n",
738
+    "    elif intent_field == 'INTENT_VAL_2':\n",
739
+    "        return '유출'\n",
740
+    "    elif intent_field == 'INTENT_VAL_3':\n",
741
+    "        return '지연'\n",
742
+    "    elif intent_field == 'INTENT_VAL_4':\n",
743
+    "        return '잠복'\n",
744
+    "    elif intent_field == 'INTENT_VAL_5':\n",
745
+    "        return '단순침입'\n",
746
+    "    elif intent_field == 'INTENT_VAL_6':\n",
747
+    "        return 'MD5'\n",
748
+    "    elif intent_field == 'INTENT_VAL_0':\n",
749
+    "        return 'Default'\n",
750
+    "    else:\n",
751
+    "        return ''\n",
752
+    "\n",
753
+    "\n",
754
+    "\n",
755
+    "# In[384]:\n",
756
+    "\n",
757
+    "\n",
758
+    "# modified\n",
759
+    "def filter_source_MTM(source):\n",
760
+    "    sources=[]\n",
761
+    "    for source_key in source:\n",
762
+    "        if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
763
+    "            source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
764
+    "            sources.append(source_key_desc)\n",
765
+    "    return sources\n",
766
+    "\n",
767
+    "\n",
768
+    "# In[385]:\n",
769
+    "\n",
770
+    "\n",
771
+    "def get_source_desc_MTM(source_field):\n",
772
+    "    if source_field=='SOURCE_VAL_1':\n",
773
+    "        return '북한IP'\n",
774
+    "    if source_field=='SOURCE_VAL_3':\n",
775
+    "        return 'ECSC Black IP'\n",
776
+    "    else:\n",
777
+    "        return ''\n",
778
+    "\n",
779
+    "\n",
780
+    "# In[386]:\n",
781
+    "\n",
782
+    "filter_assets_value(risk_df_MTM)\n",
783
+    "#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기\n",
784
+    "# New assets column\n",
785
+    "MTM_df['ASSETS_VAL']= asset_val_MTM\n",
786
+    "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)\n",
787
+    "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace('[','', regex=False)\n",
788
+    "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace(']','', regex=False)\n",
789
+    "MTM_df[:1]\n",
790
+    "# New column of intent value\n",
791
+    "MTM_df['INTENT_VAL']=intent_val_MTM\n",
792
+    "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)\n",
793
+    "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace('[','',regex=False)\n",
794
+    "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace(']','',regex=False)\n",
795
+    "MTM_df[:1]\n",
796
+    "# New column of SOURCE_VAL value\n",
797
+    "MTM_df['SOURCE_VAL']=source_val_MTM\n",
798
+    "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n",
799
+    "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)\n",
800
+    "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)\n",
801
+    "MTM_df[:5]\n",
802
+    "\n",
803
+    "# In[361]:\n",
804
+    "MTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
805
+    "MTM_df.columns\n",
806
+    "\n",
807
+    "\n",
808
+    "# In[388]:\n",
809
+    "\n",
810
+    "\n",
811
+    "MTM_df.isna().sum()\n",
812
+    "\n",
813
+    "\n",
814
+    "# In[389]:\n",
815
+    "\n",
816
+    "\n",
817
+    "# Change the Nan to zero\n",
818
+    "MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
819
+    "MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')\n",
820
+    "MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
821
+    "MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
822
+    "MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
823
+    "MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
824
+    "MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
825
+    "MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
826
+    "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
827
+    "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)\n",
828
+    "MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
829
+    "MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')\n",
830
+    "\n",
831
+    "\n",
832
+    "# In[390]:\n",
833
+    "\n",
834
+    "\n",
835
+    "# Check NaN out again\n",
836
+    "MTM_df.isna().sum()\n",
837
+    "\n",
838
+    "\n",
839
+    "# In[391]:\n",
840
+    "\n",
841
+    "# ACCD_FIND_MTD_CODE col 지우기\n",
842
+    "MTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)\n",
843
+    "\n",
844
+    "# arr를 매개변수로 받아 n개의 아이템의 조합 반환\n",
845
+    "def get_combination_MTM(arr, n):\n",
846
+    "    combination_n = list(itertools.combinations(arr.columns.tolist(),n))\n",
847
+    "    combination_n = [com for com in combination_n if 'DRULE_ATT_TYPE_CODE1' in com]\n",
848
+    "    com_list=[]\n",
849
+    "    for m in range(len(combination_n)):\n",
850
+    "        for i in range(len(arr)):\n",
851
+    "            temp_list=[]\n",
852
+    "            temp_df = arr.iloc[i]\n",
853
+    "            for col in combination_n[m]:\n",
854
+    "                # 공백 처리\n",
855
+    "                if(temp_df[col]==''):\n",
856
+    "                    break\n",
857
+    "                else:\n",
858
+    "                    temp_list.append(temp_df[col])\n",
859
+    "            com_list.append(temp_list)\n",
860
+    "    prefix = get_prefixspan_MTM(com_list)\n",
861
+    "    return prefix\n",
862
+    "\n",
863
+    "def get_prefixspan_MTM(load_list):\n",
864
+    "    n = len(load_list[0])\n",
865
+    "    save_list = PrefixSpan(load_list)\n",
866
+    "    #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 \n",
867
+    "    # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정\n",
868
+    "    save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>=n)\n",
869
+    "    save_df = pd.DataFrame(save_list)\n",
870
+    "    save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
871
+    "    save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)\n",
872
+    "    save_df = get_effect_MTM(save_df)\n",
873
+    "    return save_df\n",
874
+    "\n",
875
+    "def get_effect_MTM(edit_df):\n",
876
+    "    #Make the new column for filling the Effect\n",
877
+    "    edit_df['Effect']=np.nan\n",
878
+    "     #Change the order of columns\n",
879
+    "    edit_df=edit_df[['Cause','Effect','Frequency']]\n",
880
+    "    for i in range(len(edit_df)):\n",
881
+    "        drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
882
+    "        temp_df = edit_df.loc[i]\n",
883
+    "        for item in temp_df['Cause']:\n",
884
+    "            for drule in drules:\n",
885
+    "                if item == drule:\n",
886
+    "                    edit_df.loc[i,'Effect'] = item\n",
887
+    "    return edit_df\n",
888
+    "\n",
889
+    "\n",
890
+    "\n",
891
+    "# 1. 두 아이템의 조합\n",
892
+    "prefix_of_two_MTM = get_combination(MTM_df,2)\n",
893
+    "\n",
894
+    "# 2. 세 아이템의 조합\n",
895
+    "prefix_of_three_MTM = get_combination(MTM_df, 3)\n",
896
+    "\n",
897
+    "# 3. 네 아이템의 조합\n",
898
+    "prefix_of_four_MTM = get_combination(MTM_df, 4)\n",
899
+    "\n",
900
+    "# 4. 다섯 아이템의 조합\n",
901
+    "prefix_of_five_MTM = get_combination(MTM_df, 5)\n",
902
+    "\n",
903
+    "\n",
904
+    "# 5. 여섯 아이템의 조합\n",
905
+    "prefix_of_six_MTM = get_combination(MTM_df, 6)\n",
906
+    "\n",
907
+    "##################### MTM section End #####################"
908
+   ]
909
+  }
910
+ ],
911
+ "metadata": {
912
+  "anaconda-cloud": {},
913
+  "kernelspec": {
914
+   "display_name": "Python 3",
915
+   "language": "python",
916
+   "name": "python3"
917
+  },
918
+  "language_info": {
919
+   "codemirror_mode": {
920
+    "name": "ipython",
921
+    "version": 3
922
+   },
923
+   "file_extension": ".py",
924
+   "mimetype": "text/x-python",
925
+   "name": "python",
926
+   "nbconvert_exporter": "python",
927
+   "pygments_lexer": "ipython3",
928
+   "version": "3.8.8"
929
+  }
930
+ },
931
+ "nbformat": 4,
932
+ "nbformat_minor": 4
933
+}

Loading…
取消
儲存