|
|
@@ -1,933 +0,0 @@
|
|
1
|
|
-{
|
|
2
|
|
- "cells": [
|
|
3
|
|
- {
|
|
4
|
|
- "cell_type": "markdown",
|
|
5
|
|
- "metadata": {},
|
|
6
|
|
- "source": [
|
|
7
|
|
- "<p>NTM(유해트래픽 탐지장비)</p>\n",
|
|
8
|
|
- "<p>MTM(악성파일 탐지장비)</p>"
|
|
9
|
|
- ]
|
|
10
|
|
- },
|
|
11
|
|
- {
|
|
12
|
|
- "cell_type": "code",
|
|
13
|
|
- "execution_count": 1,
|
|
14
|
|
- "metadata": {},
|
|
15
|
|
- "outputs": [
|
|
16
|
|
- {
|
|
17
|
|
- "name": "stdout",
|
|
18
|
|
- "output_type": "stream",
|
|
19
|
|
- "text": [
|
|
20
|
|
- "10000\n",
|
|
21
|
|
- "10000\n"
|
|
22
|
|
- ]
|
|
23
|
|
- }
|
|
24
|
|
- ],
|
|
25
|
|
- "source": [
|
|
26
|
|
- "#!/usr/bin/env python\n",
|
|
27
|
|
- "# coding: utf-8\n",
|
|
28
|
|
- "\n",
|
|
29
|
|
- "import pandas as pd\n",
|
|
30
|
|
- "import numpy as np\n",
|
|
31
|
|
- "from mlxtend.preprocessing import TransactionEncoder\n",
|
|
32
|
|
- "from mlxtend.frequent_patterns import association_rules, fpgrowth\n",
|
|
33
|
|
- "from prefixspan import PrefixSpan\n",
|
|
34
|
|
- "\n",
|
|
35
|
|
- "# load ts_data_accident-2020_sample.csv\n",
|
|
36
|
|
- "# to prevent dtypewarning, set low_memory=False\n",
|
|
37
|
|
- "df = pd.read_csv('ts_data_accident-2020_sample.csv', low_memory=False)\n",
|
|
38
|
|
- "df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()\n",
|
|
39
|
|
- "len(df) #len(df) : 10000, load successful\n",
|
|
40
|
|
- "\n",
|
|
41
|
|
- "##################### NTM section #####################\n",
|
|
42
|
|
- "NTM_df=df[df['ACCD_FIND_MTD_CODE']==1] #* edit'1' to 1\n",
|
|
43
|
|
- "len(NTM_df)\n",
|
|
44
|
|
- "#* NTM_df.head()\n",
|
|
45
|
|
- "\n",
|
|
46
|
|
- "# Pick out it in order to get the asset, risk, intent, black IP out\n",
|
|
47
|
|
- "RISK_V2=NTM_df['RISK_V2']\n",
|
|
48
|
|
- "\n",
|
|
49
|
|
- "RISK_V2_FILTERED=RISK_V2.dropna()\n",
|
|
50
|
|
- "print(RISK_V2.size)\n",
|
|
51
|
|
- "print(RISK_V2_FILTERED.size)\n",
|
|
52
|
|
- "\n",
|
|
53
|
|
- "#* 추가 : 기존 filter_assets_value 사용시 값을 인식하지 못하는 문제 발생 -> RISK_V2를 별도의 df로 수정\n",
|
|
54
|
|
- "import json\n",
|
|
55
|
|
- "from pandas import json_normalize\n",
|
|
56
|
|
- "risk_df = pd.DataFrame()\n",
|
|
57
|
|
- "for newVal in RISK_V2_FILTERED:\n",
|
|
58
|
|
- " newVal = newVal.replace(\"'\", \"\\\"\")\n",
|
|
59
|
|
- " newVal_str = json.loads(newVal)\n",
|
|
60
|
|
- " newVal_df = json_normalize(newVal_str) \n",
|
|
61
|
|
- " risk_df = pd.concat([risk_df,newVal_df],ignore_index=True) \n",
|
|
62
|
|
- " \n",
|
|
63
|
|
- "risk_df_col = risk_df.columns.values.tolist()\n",
|
|
64
|
|
- "\n",
|
|
65
|
|
- "# In[352]:\n",
|
|
66
|
|
- "asset_val = []\n",
|
|
67
|
|
- "intent_val=[]\n",
|
|
68
|
|
- "source_val=[]\n",
|
|
69
|
|
- "def filter_assets_value(risk):\n",
|
|
70
|
|
- " for i in range(len(risk)):\n",
|
|
71
|
|
- " risks=[]\n",
|
|
72
|
|
- " intents=[]\n",
|
|
73
|
|
- " sources=[]\n",
|
|
74
|
|
- " try:\n",
|
|
75
|
|
- " for key in risk_df_col:\n",
|
|
76
|
|
- " if 'ASSETS_VAL_' in key and risk.iloc[i][key]:\n",
|
|
77
|
|
- " risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)\n",
|
|
78
|
|
- " risks.append(risk_key_desc)\n",
|
|
79
|
|
- " if 'INTENT_VAL_' in key and risk.iloc[i][key]:\n",
|
|
80
|
|
- " intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)\n",
|
|
81
|
|
- " intents.append(intent_key_desc)\n",
|
|
82
|
|
- " if 'SOURCE_VAL_' in key and risk.iloc[i][key]:\n",
|
|
83
|
|
- " source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)\n",
|
|
84
|
|
- " sources.append(source_key_desc)\n",
|
|
85
|
|
- " except:\n",
|
|
86
|
|
- " print(risk)\n",
|
|
87
|
|
- " print(type(risk))\n",
|
|
88
|
|
- " finally:\n",
|
|
89
|
|
- " asset_val.append(risks)\n",
|
|
90
|
|
- " intent_val.append(intents)\n",
|
|
91
|
|
- " source_val.append(sources)\n",
|
|
92
|
|
- " \n",
|
|
93
|
|
- " \n",
|
|
94
|
|
- "# modified\n",
|
|
95
|
|
- "def get_asset_desc(asset_field):\n",
|
|
96
|
|
- " if asset_field == 'ASSETS_VAL_1':\n",
|
|
97
|
|
- " return '공인-전체IP대역(유선)'\n",
|
|
98
|
|
- " elif asset_field == 'ASSETS_VAL_2':\n",
|
|
99
|
|
- " return '공인-전체IP대역(무선)'\n",
|
|
100
|
|
- " elif asset_field == 'ASSETS_VAL_3':\n",
|
|
101
|
|
- " return '공인-WEB서버'\n",
|
|
102
|
|
- " elif asset_field == 'ASSETS_VAL_4':\n",
|
|
103
|
|
- " return '공인-내부응용서버'\n",
|
|
104
|
|
- " elif asset_field == 'ASSETS_VAL_5':\n",
|
|
105
|
|
- " return '공인-DB서버'\n",
|
|
106
|
|
- " elif asset_field == 'ASSETS_VAL_6':\n",
|
|
107
|
|
- " return '공인-패치서버'\n",
|
|
108
|
|
- " elif asset_field == 'ASSETS_VAL_7':\n",
|
|
109
|
|
- " return '공인-네트워크'\n",
|
|
110
|
|
- " elif asset_field == 'ASSETS_VAL_8':\n",
|
|
111
|
|
- " return '공인-보안'\n",
|
|
112
|
|
- " elif asset_field == 'ASSETS_VAL_9':\n",
|
|
113
|
|
- " return '공인-업무용PC'\n",
|
|
114
|
|
- " elif asset_field == 'ASSETS_VAL_10':\n",
|
|
115
|
|
- " return '공인-비업무용PC'\n",
|
|
116
|
|
- " elif asset_field == 'ASSETS_VAL_11':\n",
|
|
117
|
|
- " return '공인-기타'\n",
|
|
118
|
|
- " elif asset_field == 'ASSETS_VAL_12':\n",
|
|
119
|
|
- " return '사설-전체IP대역(유선)'\n",
|
|
120
|
|
- " elif asset_field == 'ASSETS_VAL_13':\n",
|
|
121
|
|
- " return '사설-전체IP대역(무선)'\n",
|
|
122
|
|
- " elif asset_field == 'ASSETS_VAL_14':\n",
|
|
123
|
|
- " return '사설-WEB서버'\n",
|
|
124
|
|
- " elif asset_field == 'ASSETS_VAL_15':\n",
|
|
125
|
|
- " return '사설-내부응용서버'\n",
|
|
126
|
|
- " elif asset_field == 'ASSETS_VAL_16':\n",
|
|
127
|
|
- " return '사설-DB서버'\n",
|
|
128
|
|
- " elif asset_field == 'ASSETS_VAL_17':\n",
|
|
129
|
|
- " return '사설-패치서버'\n",
|
|
130
|
|
- " elif asset_field == 'ASSETS_VAL_18':\n",
|
|
131
|
|
- " return '사설-네트워크'\n",
|
|
132
|
|
- " elif asset_field == 'ASSETS_VAL_19':\n",
|
|
133
|
|
- " return '사설-보안'\n",
|
|
134
|
|
- " elif asset_field == 'ASSETS_VAL_20':\n",
|
|
135
|
|
- " return '사설-업무용PC'\n",
|
|
136
|
|
- " elif asset_field == 'ASSETS_VAL_21':\n",
|
|
137
|
|
- " return '사설-비업무용PC'\n",
|
|
138
|
|
- " elif asset_field == 'ASSETS_VAL_22':\n",
|
|
139
|
|
- " return '사설-기타'\n",
|
|
140
|
|
- " else:\n",
|
|
141
|
|
- " return ''\n",
|
|
142
|
|
- "\n",
|
|
143
|
|
- "\n",
|
|
144
|
|
- "\n",
|
|
145
|
|
- "# modified\n",
|
|
146
|
|
- "def filter_intent(intent):\n",
|
|
147
|
|
- " intents=[]\n",
|
|
148
|
|
- " for intent_key in intent:\n",
|
|
149
|
|
- " if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
|
|
150
|
|
- " intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
|
|
151
|
|
- " intents.append(intent_key_desc)\n",
|
|
152
|
|
- " return intents\n",
|
|
153
|
|
- "\n",
|
|
154
|
|
- "\n",
|
|
155
|
|
- "# In[356]:\n",
|
|
156
|
|
- "\n",
|
|
157
|
|
- "\n",
|
|
158
|
|
- "def get_intent_desc(intent_field):\n",
|
|
159
|
|
- " if intent_field == 'INTENT_VAL_1':\n",
|
|
160
|
|
- " return '파괴'\n",
|
|
161
|
|
- " elif intent_field == 'INTENT_VAL_2':\n",
|
|
162
|
|
- " return '유출'\n",
|
|
163
|
|
- " elif intent_field == 'INTENT_VAL_3':\n",
|
|
164
|
|
- " return '지연'\n",
|
|
165
|
|
- " elif intent_field == 'INTENT_VAL_4':\n",
|
|
166
|
|
- " return '잠복'\n",
|
|
167
|
|
- " elif intent_field == 'INTENT_VAL_5':\n",
|
|
168
|
|
- " return '단순침입'\n",
|
|
169
|
|
- " elif intent_field == 'INTENT_VAL_6':\n",
|
|
170
|
|
- " return 'MD5'\n",
|
|
171
|
|
- " elif intent_field == 'INTENT_VAL_0':\n",
|
|
172
|
|
- " return 'Default'\n",
|
|
173
|
|
- " else:\n",
|
|
174
|
|
- " return ''\n",
|
|
175
|
|
- "\n",
|
|
176
|
|
- "\n",
|
|
177
|
|
- "# In[358]:\n",
|
|
178
|
|
- "\n",
|
|
179
|
|
- "\n",
|
|
180
|
|
- "# modified\n",
|
|
181
|
|
- "def filter_source(source):\n",
|
|
182
|
|
- " sources=[]\n",
|
|
183
|
|
- " for source_key in source:\n",
|
|
184
|
|
- " if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
|
|
185
|
|
- " source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
|
|
186
|
|
- " sources.append(source_key_desc)\n",
|
|
187
|
|
- " return sources\n",
|
|
188
|
|
- "\n",
|
|
189
|
|
- "\n",
|
|
190
|
|
- "# In[359]:\n",
|
|
191
|
|
- "\n",
|
|
192
|
|
- "\n",
|
|
193
|
|
- "def get_source_desc(source_field):\n",
|
|
194
|
|
- " if source_field=='SOURCE_VAL_1':\n",
|
|
195
|
|
- " return '북한IP'\n",
|
|
196
|
|
- " if source_field=='SOURCE_VAL_3':\n",
|
|
197
|
|
- " return 'ECSC Black IP'\n",
|
|
198
|
|
- " else:\n",
|
|
199
|
|
- " return ''\n",
|
|
200
|
|
- "\n",
|
|
201
|
|
- "\n"
|
|
202
|
|
- ]
|
|
203
|
|
- },
|
|
204
|
|
- {
|
|
205
|
|
- "cell_type": "code",
|
|
206
|
|
- "execution_count": 2,
|
|
207
|
|
- "metadata": {},
|
|
208
|
|
- "outputs": [
|
|
209
|
|
- {
|
|
210
|
|
- "data": {
|
|
211
|
|
- "text/plain": [
|
|
212
|
|
- "Index(['INST_NM', 'DRULE_ATT_TYPE_CODE1', 'TW_ATT_IP', 'TW_ATT_PORT',\n",
|
|
213
|
|
- " 'TW_DMG_IP', 'TW_DMG_PORT', 'ACCD_DMG_PROTO_NM', 'TW_ATT_CT_NM',\n",
|
|
214
|
|
- " 'ACCD_FIND_MTD_CODE', 'DRULE_NM', 'ASSETS_VAL', 'INTENT_VAL',\n",
|
|
215
|
|
- " 'SOURCE_VAL'],\n",
|
|
216
|
|
- " dtype='object')"
|
|
217
|
|
- ]
|
|
218
|
|
- },
|
|
219
|
|
- "execution_count": 2,
|
|
220
|
|
- "metadata": {},
|
|
221
|
|
- "output_type": "execute_result"
|
|
222
|
|
- }
|
|
223
|
|
- ],
|
|
224
|
|
- "source": [
|
|
225
|
|
- "filter_assets_value(risk_df)\n",
|
|
226
|
|
- "#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기\n",
|
|
227
|
|
- "# New assets column\n",
|
|
228
|
|
- "NTM_df['ASSETS_VAL']= asset_val\n",
|
|
229
|
|
- "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)\n",
|
|
230
|
|
- "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace('[','', regex=False)\n",
|
|
231
|
|
- "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].str.replace(']','', regex=False)\n",
|
|
232
|
|
- "NTM_df[:1]\n",
|
|
233
|
|
- "# New column of intent value\n",
|
|
234
|
|
- "NTM_df['INTENT_VAL']=intent_val\n",
|
|
235
|
|
- "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)\n",
|
|
236
|
|
- "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace('[','',regex=False)\n",
|
|
237
|
|
- "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].str.replace(']','',regex=False)\n",
|
|
238
|
|
- "NTM_df[:1]\n",
|
|
239
|
|
- "# New column of SOURCE_VAL value\n",
|
|
240
|
|
- "NTM_df['SOURCE_VAL']=source_val\n",
|
|
241
|
|
- "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n",
|
|
242
|
|
- "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)\n",
|
|
243
|
|
- "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)\n",
|
|
244
|
|
- "NTM_df[:5]\n",
|
|
245
|
|
- "\n",
|
|
246
|
|
- "# In[361]:\n",
|
|
247
|
|
- "NTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
|
|
248
|
|
- "NTM_df.columns"
|
|
249
|
|
- ]
|
|
250
|
|
- },
|
|
251
|
|
- {
|
|
252
|
|
- "cell_type": "code",
|
|
253
|
|
- "execution_count": 3,
|
|
254
|
|
- "metadata": {},
|
|
255
|
|
- "outputs": [],
|
|
256
|
|
- "source": [
|
|
257
|
|
- "#data frame의 i번째 row를 list로 저장하여 itertools.combinations로 모든 조합 만들 예정\n",
|
|
258
|
|
- "#TW_ATT_IP와 TW_DMG_IP의 값이 같은 경우 어떤 값과의 관계인지 알 수 없으므로 데이터 가공\n",
|
|
259
|
|
- "NTM_df['TW_ATT_IP']=\"TW_ATT_IP=\"+NTM_df['TW_ATT_IP'].astype(str)\n",
|
|
260
|
|
- "NTM_df['TW_ATT_PORT']=\"TW_ATT_PORT=\"+NTM_df['TW_ATT_PORT'].astype(str)\n",
|
|
261
|
|
- "NTM_df['TW_DMG_IP']=\"TW_DMG_IP=\"+NTM_df['TW_DMG_IP'].astype(str)\n",
|
|
262
|
|
- "NTM_df['TW_DMG_PORT']=\"TW_DMG_PORT=\"+NTM_df['TW_DMG_PORT'].astype(str)"
|
|
263
|
|
- ]
|
|
264
|
|
- },
|
|
265
|
|
- {
|
|
266
|
|
- "cell_type": "code",
|
|
267
|
|
- "execution_count": 4,
|
|
268
|
|
- "metadata": {},
|
|
269
|
|
- "outputs": [
|
|
270
|
|
- {
|
|
271
|
|
- "data": {
|
|
272
|
|
- "text/plain": [
|
|
273
|
|
- "INST_NM 0\n",
|
|
274
|
|
- "DRULE_ATT_TYPE_CODE1 0\n",
|
|
275
|
|
- "TW_ATT_IP 0\n",
|
|
276
|
|
- "TW_ATT_PORT 0\n",
|
|
277
|
|
- "TW_DMG_IP 0\n",
|
|
278
|
|
- "TW_DMG_PORT 0\n",
|
|
279
|
|
- "ACCD_DMG_PROTO_NM 0\n",
|
|
280
|
|
- "TW_ATT_CT_NM 0\n",
|
|
281
|
|
- "ACCD_FIND_MTD_CODE 0\n",
|
|
282
|
|
- "DRULE_NM 0\n",
|
|
283
|
|
- "ASSETS_VAL 0\n",
|
|
284
|
|
- "INTENT_VAL 0\n",
|
|
285
|
|
- "SOURCE_VAL 0\n",
|
|
286
|
|
- "dtype: int64"
|
|
287
|
|
- ]
|
|
288
|
|
- },
|
|
289
|
|
- "execution_count": 4,
|
|
290
|
|
- "metadata": {},
|
|
291
|
|
- "output_type": "execute_result"
|
|
292
|
|
- }
|
|
293
|
|
- ],
|
|
294
|
|
- "source": [
|
|
295
|
|
- "##################### 여기서부터 진행하시면 됩니다. #####################\n",
|
|
296
|
|
- "##################### 아래 12개 아이템(12. 장비 ACCD_FIND_MTD_CODE 제외)에 대해서 모든 아이템 조합에 알고리즘 적용하기#####################\n",
|
|
297
|
|
- "\n",
|
|
298
|
|
- "# It should be 13 columns in total\n",
|
|
299
|
|
- "\n",
|
|
300
|
|
- "# 1. 기관 INST_NM\n",
|
|
301
|
|
- "# 2. 공격 DRULE_ATT_TYPE_CODE1\n",
|
|
302
|
|
- "# 3. 자산 ASSETS_VAL\n",
|
|
303
|
|
- "# 4. 위협공격ip TW_ATT_IP\n",
|
|
304
|
|
- "# 5. 위협공격port TW_ATT_PORT\n",
|
|
305
|
|
- "# 6. 위협피해ip TW_DMG_IP\n",
|
|
306
|
|
- "# 7. 위협피해port TW_DMG_PORT\n",
|
|
307
|
|
- "# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM\n",
|
|
308
|
|
- "# 9. 공격국가 TW_ATT_CT_NM\n",
|
|
309
|
|
- "# 10. 의도(7개) INTENT_VAL\n",
|
|
310
|
|
- "# 11. IP/URL 가중치 SOURCE_VAL\n",
|
|
311
|
|
- "# 12. 장비 ACCD_FIND_MTD_CODE\n",
|
|
312
|
|
- "# 13. 탐지규칙명 DRULE_NM\n",
|
|
313
|
|
- "\n",
|
|
314
|
|
- "\n",
|
|
315
|
|
- "# In[363]:\n",
|
|
316
|
|
- "NTM_df.isna().sum()\n",
|
|
317
|
|
- "\n",
|
|
318
|
|
- "\n",
|
|
319
|
|
- "# Change the Nan to zero\n",
|
|
320
|
|
- "NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
|
|
321
|
|
- "NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')\n",
|
|
322
|
|
- "NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
|
|
323
|
|
- "NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
|
|
324
|
|
- "NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
|
|
325
|
|
- "NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
|
|
326
|
|
- "NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
|
|
327
|
|
- "NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
|
|
328
|
|
- "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
|
|
329
|
|
- "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)\n",
|
|
330
|
|
- "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
|
|
331
|
|
- "NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')\n",
|
|
332
|
|
- "\n",
|
|
333
|
|
- "\n",
|
|
334
|
|
- "# Check NaN out again\n",
|
|
335
|
|
- "NTM_df.isna().sum()\n"
|
|
336
|
|
- ]
|
|
337
|
|
- },
|
|
338
|
|
- {
|
|
339
|
|
- "cell_type": "code",
|
|
340
|
|
- "execution_count": 5,
|
|
341
|
|
- "metadata": {},
|
|
342
|
|
- "outputs": [],
|
|
343
|
|
- "source": [
|
|
344
|
|
- "# NTM_df의 col을 list로 저장. itertools.combinations로 가능한 시나리오 모두 추출\n",
|
|
345
|
|
- "\n",
|
|
346
|
|
- "# ACCD_FIND_MTD_CODE col 지우기\n",
|
|
347
|
|
- "NTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)"
|
|
348
|
|
- ]
|
|
349
|
|
- },
|
|
350
|
|
- {
|
|
351
|
|
- "cell_type": "code",
|
|
352
|
|
- "execution_count": 6,
|
|
353
|
|
- "metadata": {},
|
|
354
|
|
- "outputs": [],
|
|
355
|
|
- "source": [
|
|
356
|
|
- "from prefixspan import PrefixSpan\n",
|
|
357
|
|
- "import itertools\n",
|
|
358
|
|
- "# arr를 매개변수로 받아 n개의 아이템의 조합 반환\n",
|
|
359
|
|
- "def get_combination(arr, n):\n",
|
|
360
|
|
- " combination_n = list(itertools.combinations(arr.columns.tolist(),n))\n",
|
|
361
|
|
- " combination_n = [com for com in combination_n if 'DRULE_ATT_TYPE_CODE1' in com]\n",
|
|
362
|
|
- " com_list=[]\n",
|
|
363
|
|
- " # row i 의 (1,2),(1,3)... 이런식으로 하니까 시간 너무 오래걸림\n",
|
|
364
|
|
- " # (1,2) 조합에 대한 row i, row i+1, row i+2... 이렇게 바꿈\n",
|
|
365
|
|
- " for m in range(len(combination_n)):\n",
|
|
366
|
|
- " for i in range(len(arr)):\n",
|
|
367
|
|
- " temp_list=[]\n",
|
|
368
|
|
- " temp_df = arr.iloc[i]\n",
|
|
369
|
|
- " for col in combination_n[m]:\n",
|
|
370
|
|
- " # 공백 처리\n",
|
|
371
|
|
- " if(temp_df[col]==''):\n",
|
|
372
|
|
- " break\n",
|
|
373
|
|
- " else:\n",
|
|
374
|
|
- " temp_list.append(temp_df[col])\n",
|
|
375
|
|
- " com_list.append(temp_list)\n",
|
|
376
|
|
- " prefix = get_prefixspan(com_list)\n",
|
|
377
|
|
- " return prefix\n",
|
|
378
|
|
- "\n",
|
|
379
|
|
- "def get_prefixspan(load_list):\n",
|
|
380
|
|
- " n = len(load_list[0])\n",
|
|
381
|
|
- " save_list = PrefixSpan(load_list)\n",
|
|
382
|
|
- " #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 \n",
|
|
383
|
|
- " # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정\n",
|
|
384
|
|
- " save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>=n)\n",
|
|
385
|
|
- " save_df = pd.DataFrame(save_list)\n",
|
|
386
|
|
- " save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
|
|
387
|
|
- " save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)\n",
|
|
388
|
|
- " save_df = get_effect(save_df)\n",
|
|
389
|
|
- " return save_df\n",
|
|
390
|
|
- "\n",
|
|
391
|
|
- "def get_effect(edit_df):\n",
|
|
392
|
|
- " #Make the new column for filling the Effect\n",
|
|
393
|
|
- " edit_df['Effect']=np.nan\n",
|
|
394
|
|
- " #Change the order of columns\n",
|
|
395
|
|
- " edit_df=edit_df[['Cause','Effect','Frequency']]\n",
|
|
396
|
|
- " for i in range(len(edit_df)):\n",
|
|
397
|
|
- " drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
|
|
398
|
|
- " temp_df = edit_df.loc[i]\n",
|
|
399
|
|
- " for item in temp_df['Cause']:\n",
|
|
400
|
|
- " for drule in drules:\n",
|
|
401
|
|
- " if item == drule:\n",
|
|
402
|
|
- " edit_df.loc[i,'Effect'] = item\n",
|
|
403
|
|
- " return edit_df\n"
|
|
404
|
|
- ]
|
|
405
|
|
- },
|
|
406
|
|
- {
|
|
407
|
|
- "cell_type": "code",
|
|
408
|
|
- "execution_count": 7,
|
|
409
|
|
- "metadata": {},
|
|
410
|
|
- "outputs": [
|
|
411
|
|
- {
|
|
412
|
|
- "data": {
|
|
413
|
|
- "text/html": [
|
|
414
|
|
- "<div>\n",
|
|
415
|
|
- "<style scoped>\n",
|
|
416
|
|
- " .dataframe tbody tr th:only-of-type {\n",
|
|
417
|
|
- " vertical-align: middle;\n",
|
|
418
|
|
- " }\n",
|
|
419
|
|
- "\n",
|
|
420
|
|
- " .dataframe tbody tr th {\n",
|
|
421
|
|
- " vertical-align: top;\n",
|
|
422
|
|
- " }\n",
|
|
423
|
|
- "\n",
|
|
424
|
|
- " .dataframe thead th {\n",
|
|
425
|
|
- " text-align: right;\n",
|
|
426
|
|
- " }\n",
|
|
427
|
|
- "</style>\n",
|
|
428
|
|
- "<table border=\"1\" class=\"dataframe\">\n",
|
|
429
|
|
- " <thead>\n",
|
|
430
|
|
- " <tr style=\"text-align: right;\">\n",
|
|
431
|
|
- " <th></th>\n",
|
|
432
|
|
- " <th>Cause</th>\n",
|
|
433
|
|
- " <th>Effect</th>\n",
|
|
434
|
|
- " <th>Frequency</th>\n",
|
|
435
|
|
- " </tr>\n",
|
|
436
|
|
- " </thead>\n",
|
|
437
|
|
- " <tbody>\n",
|
|
438
|
|
- " <tr>\n",
|
|
439
|
|
- " <th>0</th>\n",
|
|
440
|
|
- " <td>[Attack, 'RISK_V2.INTENT_VAL_5=단순침입']</td>\n",
|
|
441
|
|
- " <td>Attack</td>\n",
|
|
442
|
|
- " <td>7709</td>\n",
|
|
443
|
|
- " </tr>\n",
|
|
444
|
|
- " <tr>\n",
|
|
445
|
|
- " <th>1</th>\n",
|
|
446
|
|
- " <td>[Attack, 'RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
|
|
447
|
|
- " <td>Attack</td>\n",
|
|
448
|
|
- " <td>3175</td>\n",
|
|
449
|
|
- " </tr>\n",
|
|
450
|
|
- " <tr>\n",
|
|
451
|
|
- " <th>2</th>\n",
|
|
452
|
|
- " <td>[Attack, Attack-Scan-29-01-PHPUnit(CVE17-9841)...</td>\n",
|
|
453
|
|
- " <td>Attack</td>\n",
|
|
454
|
|
- " <td>2770</td>\n",
|
|
455
|
|
- " </tr>\n",
|
|
456
|
|
- " <tr>\n",
|
|
457
|
|
- " <th>3</th>\n",
|
|
458
|
|
- " <td>[Attack, 중국]</td>\n",
|
|
459
|
|
- " <td>Attack</td>\n",
|
|
460
|
|
- " <td>2689</td>\n",
|
|
461
|
|
- " </tr>\n",
|
|
462
|
|
- " <tr>\n",
|
|
463
|
|
- " <th>4</th>\n",
|
|
464
|
|
- " <td>[Attack, 'RISK_V2.SOURCE_VAL_3=ECSC Black IP']</td>\n",
|
|
465
|
|
- " <td>Attack</td>\n",
|
|
466
|
|
- " <td>1904</td>\n",
|
|
467
|
|
- " </tr>\n",
|
|
468
|
|
- " <tr>\n",
|
|
469
|
|
- " <th>...</th>\n",
|
|
470
|
|
- " <td>...</td>\n",
|
|
471
|
|
- " <td>...</td>\n",
|
|
472
|
|
- " <td>...</td>\n",
|
|
473
|
|
- " </tr>\n",
|
|
474
|
|
- " <tr>\n",
|
|
475
|
|
- " <th>41145</th>\n",
|
|
476
|
|
- " <td>[Attack, TW_ATT_PORT=5389]</td>\n",
|
|
477
|
|
- " <td>Attack</td>\n",
|
|
478
|
|
- " <td>1</td>\n",
|
|
479
|
|
- " </tr>\n",
|
|
480
|
|
- " <tr>\n",
|
|
481
|
|
- " <th>41146</th>\n",
|
|
482
|
|
- " <td>[Attack, TW_ATT_PORT=38677]</td>\n",
|
|
483
|
|
- " <td>Attack</td>\n",
|
|
484
|
|
- " <td>1</td>\n",
|
|
485
|
|
- " </tr>\n",
|
|
486
|
|
- " <tr>\n",
|
|
487
|
|
- " <th>41147</th>\n",
|
|
488
|
|
- " <td>[Attack, TW_ATT_PORT=8287]</td>\n",
|
|
489
|
|
- " <td>Attack</td>\n",
|
|
490
|
|
- " <td>1</td>\n",
|
|
491
|
|
- " </tr>\n",
|
|
492
|
|
- " <tr>\n",
|
|
493
|
|
- " <th>41148</th>\n",
|
|
494
|
|
- " <td>[Attack, TW_ATT_PORT=2404]</td>\n",
|
|
495
|
|
- " <td>Attack</td>\n",
|
|
496
|
|
- " <td>1</td>\n",
|
|
497
|
|
- " </tr>\n",
|
|
498
|
|
- " <tr>\n",
|
|
499
|
|
- " <th>41149</th>\n",
|
|
500
|
|
- " <td>[Seoul Christian University, Malwr]</td>\n",
|
|
501
|
|
- " <td>Malwr</td>\n",
|
|
502
|
|
- " <td>1</td>\n",
|
|
503
|
|
- " </tr>\n",
|
|
504
|
|
- " </tbody>\n",
|
|
505
|
|
- "</table>\n",
|
|
506
|
|
- "<p>41150 rows × 3 columns</p>\n",
|
|
507
|
|
- "</div>"
|
|
508
|
|
- ],
|
|
509
|
|
- "text/plain": [
|
|
510
|
|
- " Cause Effect Frequency\n",
|
|
511
|
|
- "0 [Attack, 'RISK_V2.INTENT_VAL_5=단순침입'] Attack 7709\n",
|
|
512
|
|
- "1 [Attack, 'RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] Attack 3175\n",
|
|
513
|
|
- "2 [Attack, Attack-Scan-29-01-PHPUnit(CVE17-9841)... Attack 2770\n",
|
|
514
|
|
- "3 [Attack, 중국] Attack 2689\n",
|
|
515
|
|
- "4 [Attack, 'RISK_V2.SOURCE_VAL_3=ECSC Black IP'] Attack 1904\n",
|
|
516
|
|
- "... ... ... ...\n",
|
|
517
|
|
- "41145 [Attack, TW_ATT_PORT=5389] Attack 1\n",
|
|
518
|
|
- "41146 [Attack, TW_ATT_PORT=38677] Attack 1\n",
|
|
519
|
|
- "41147 [Attack, TW_ATT_PORT=8287] Attack 1\n",
|
|
520
|
|
- "41148 [Attack, TW_ATT_PORT=2404] Attack 1\n",
|
|
521
|
|
- "41149 [Seoul Christian University, Malwr] Malwr 1\n",
|
|
522
|
|
- "\n",
|
|
523
|
|
- "[41150 rows x 3 columns]"
|
|
524
|
|
- ]
|
|
525
|
|
- },
|
|
526
|
|
- "execution_count": 7,
|
|
527
|
|
- "metadata": {},
|
|
528
|
|
- "output_type": "execute_result"
|
|
529
|
|
- }
|
|
530
|
|
- ],
|
|
531
|
|
- "source": [
|
|
532
|
|
- "# 1. 두 아이템의 조합\n",
|
|
533
|
|
- "item = 2\n",
|
|
534
|
|
- "prefix_of_two = get_combination(NTM_df, item)\n",
|
|
535
|
|
- "prefix_of_two"
|
|
536
|
|
- ]
|
|
537
|
|
- },
|
|
538
|
|
- {
|
|
539
|
|
- "cell_type": "code",
|
|
540
|
|
- "execution_count": 8,
|
|
541
|
|
- "metadata": {},
|
|
542
|
|
- "outputs": [
|
|
543
|
|
- {
|
|
544
|
|
- "ename": "KeyboardInterrupt",
|
|
545
|
|
- "evalue": "",
|
|
546
|
|
- "output_type": "error",
|
|
547
|
|
- "traceback": [
|
|
548
|
|
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
549
|
|
- "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
|
550
|
|
- "\u001b[1;32m<ipython-input-8-fdb1732ee6a2>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# 2. 세 아이템의 조합\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mprefix_of_three\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_combination\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mNTM_df\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
|
|
551
|
|
- "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_combination\u001b[1;34m(arr, n)\u001b[0m\n\u001b[0;32m 19\u001b[0m \u001b[0mtemp_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemp_df\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[0mcom_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtemp_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 21\u001b[1;33m \u001b[0mprefix\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_prefixspan\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcom_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 22\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mprefix\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
552
|
|
- "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_prefixspan\u001b[1;34m(load_list)\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[0msave_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m{\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'Frequency'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'Cause'\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 32\u001b[0m \u001b[0msave_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msave_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Frequency'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mascending\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mignore_index\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m \u001b[0msave_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_effect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msave_df\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 34\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0msave_df\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
553
|
|
- "\u001b[1;32m<ipython-input-6-7cca23a52bd5>\u001b[0m in \u001b[0;36mget_effect\u001b[1;34m(edit_df)\u001b[0m\n\u001b[0;32m 45\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mdrule\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdrules\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 46\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mitem\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mdrule\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 47\u001b[1;33m \u001b[0medit_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Effect'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mitem\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 48\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0medit_df\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
554
|
|
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m 690\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 691\u001b[0m \u001b[0miloc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"iloc\"\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 692\u001b[1;33m \u001b[0miloc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_with_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 693\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 694\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_validate_key\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
555
|
|
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer\u001b[1;34m(self, indexer, value, name)\u001b[0m\n\u001b[0;32m 1633\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mtake_split_path\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1634\u001b[0m \u001b[1;31m# We have to operate column-wise\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1635\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_with_indexer_split_path\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1636\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1637\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_single_block\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
556
|
|
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer_split_path\u001b[1;34m(self, indexer, value, name)\u001b[0m\n\u001b[0;32m 1718\u001b[0m \u001b[1;31m# scalar value\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1719\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mloc\u001b[0m \u001b[1;32min\u001b[0m \u001b[0milocs\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1720\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setitem_single_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpi\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1721\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1722\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_setitem_with_indexer_2d_value\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
557
|
|
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_setitem_single_column\u001b[1;34m(self, loc, value, plane_indexer)\u001b[0m\n\u001b[0;32m 1815\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1816\u001b[0m \u001b[1;31m# reset the sliced object if unique\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1817\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_iset_item\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mser\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1818\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1819\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_setitem_single_block\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
558
|
|
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_iset_item\u001b[1;34m(self, loc, value)\u001b[0m\n\u001b[0;32m 3220\u001b[0m \u001b[1;31m# technically _sanitize_column expects a label, not a position,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3221\u001b[0m \u001b[1;31m# but the behavior is the same as long as we pass broadcast=False\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3222\u001b[1;33m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sanitize_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbroadcast\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3223\u001b[0m \u001b[0mNDFrame\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_iset_item\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3224\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
559
|
|
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_sanitize_column\u001b[1;34m(self, key, value, broadcast)\u001b[0m\n\u001b[0;32m 3874\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3875\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3876\u001b[1;33m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mreindexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3877\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3878\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
560
|
|
- "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mreindexer\u001b[1;34m(value)\u001b[0m\n\u001b[0;32m 3855\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3856\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mequals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mor\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3857\u001b[1;33m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3858\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3859\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
561
|
|
- "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
|
|
562
|
|
- ]
|
|
563
|
|
- }
|
|
564
|
|
- ],
|
|
565
|
|
- "source": [
|
|
566
|
|
- "# 2. 세 아이템의 조합\n",
|
|
567
|
|
- "prefix_of_three = get_combination(NTM_df, 3)"
|
|
568
|
|
- ]
|
|
569
|
|
- },
|
|
570
|
|
- {
|
|
571
|
|
- "cell_type": "code",
|
|
572
|
|
- "execution_count": null,
|
|
573
|
|
- "metadata": {},
|
|
574
|
|
- "outputs": [],
|
|
575
|
|
- "source": [
|
|
576
|
|
- "# 3. 네 아이템의 조합\n",
|
|
577
|
|
- "prefix_of_four = get_combination(NTM_df, 4)"
|
|
578
|
|
- ]
|
|
579
|
|
- },
|
|
580
|
|
- {
|
|
581
|
|
- "cell_type": "code",
|
|
582
|
|
- "execution_count": null,
|
|
583
|
|
- "metadata": {},
|
|
584
|
|
- "outputs": [],
|
|
585
|
|
- "source": [
|
|
586
|
|
- "# 4. 다섯 아이템의 조합\n",
|
|
587
|
|
- "prefix_of_five = get_combination(NTM_df, 5)"
|
|
588
|
|
- ]
|
|
589
|
|
- },
|
|
590
|
|
- {
|
|
591
|
|
- "cell_type": "code",
|
|
592
|
|
- "execution_count": null,
|
|
593
|
|
- "metadata": {},
|
|
594
|
|
- "outputs": [],
|
|
595
|
|
- "source": [
|
|
596
|
|
- "# 5. 여섯 아이템의 조합\n",
|
|
597
|
|
- "prefix_of_six = get_combination(NTM_df, 6)\n",
|
|
598
|
|
- "##################### NTM section End #####################"
|
|
599
|
|
- ]
|
|
600
|
|
- },
|
|
601
|
|
- {
|
|
602
|
|
- "cell_type": "code",
|
|
603
|
|
- "execution_count": null,
|
|
604
|
|
- "metadata": {},
|
|
605
|
|
- "outputs": [],
|
|
606
|
|
- "source": [
|
|
607
|
|
- "##################### MTM section #####################\n",
|
|
608
|
|
- "# Same goes for the MTM section\n",
|
|
609
|
|
- "\n",
|
|
610
|
|
- "# In[375]:\n",
|
|
611
|
|
- "\n",
|
|
612
|
|
- "\n",
|
|
613
|
|
- "MTM_df=df[df['ACCD_FIND_MTD_CODE']==2]\n",
|
|
614
|
|
- "len(MTM_df)\n",
|
|
615
|
|
- "\n",
|
|
616
|
|
- "\n",
|
|
617
|
|
- "# In[376]:\n",
|
|
618
|
|
- "\n",
|
|
619
|
|
- "\n",
|
|
620
|
|
- "# Pick out it in order to get the asset, risk, intent, black IP out\n",
|
|
621
|
|
- "RISK_V2_MTM=MTM_df['RISK_V2']\n",
|
|
622
|
|
- "\n",
|
|
623
|
|
- "RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()\n",
|
|
624
|
|
- "print(RISK_V2_MTM.size)\n",
|
|
625
|
|
- "print(RISK_V2_FILTERED_MTM.size)\n",
|
|
626
|
|
- "\n",
|
|
627
|
|
- "risk_df_MTM = pd.DataFrame()\n",
|
|
628
|
|
- "for newVal_MTM in RISK_V2_FILTERED_MTM:\n",
|
|
629
|
|
- " newVal_MTM = newVal_MTM.replace(\"'\", \"\\\"\")\n",
|
|
630
|
|
- " newVal_MTM_str = json.loads(newVal_MTM)\n",
|
|
631
|
|
- " newVal_df_MTM = json_normalize(newVal_MTM_str) \n",
|
|
632
|
|
- " risk_df_MTM = pd.concat([risk_df_MTM,newVal_df_MTM],ignore_index=True) \n",
|
|
633
|
|
- " \n",
|
|
634
|
|
- "risk_df_col_MTM = risk_df_MTM.columns.values.tolist()\n",
|
|
635
|
|
- "\n",
|
|
636
|
|
- "# In[377]:\n",
|
|
637
|
|
- "\n",
|
|
638
|
|
- "\n",
|
|
639
|
|
- "asset_val_MTM = []\n",
|
|
640
|
|
- "intent_val_MTM=[]\n",
|
|
641
|
|
- "source_val_MTM=[]\n",
|
|
642
|
|
- "\n",
|
|
643
|
|
- "def filter_assets_value_MTM(risk):\n",
|
|
644
|
|
- " for i in range(len(risk)):\n",
|
|
645
|
|
- " risks=[]\n",
|
|
646
|
|
- " intents=[]\n",
|
|
647
|
|
- " sources=[]\n",
|
|
648
|
|
- " try:\n",
|
|
649
|
|
- " for key in risk_df_col:\n",
|
|
650
|
|
- " if 'ASSETS_VAL_' in key and risk.iloc[i][key]:\n",
|
|
651
|
|
- " risk_key_desc = 'RISK_V2.' + key + '=' + get_asset_desc(key)\n",
|
|
652
|
|
- " risks.append(risk_key_desc)\n",
|
|
653
|
|
- " if 'INTENT_VAL_' in key and risk.iloc[i][key]:\n",
|
|
654
|
|
- " intent_key_desc = 'RISK_V2.' + key + '=' + get_intent_desc(key)\n",
|
|
655
|
|
- " intents.append(intent_key_desc)\n",
|
|
656
|
|
- " if 'SOURCE_VAL_' in key and risk.iloc[i][key]:\n",
|
|
657
|
|
- " source_key_desc='RISK_V2.' + key + '=' + get_source_desc(key)\n",
|
|
658
|
|
- " sources.append(source_key_desc)\n",
|
|
659
|
|
- " except:\n",
|
|
660
|
|
- " print(risk)\n",
|
|
661
|
|
- " print(type(risk))\n",
|
|
662
|
|
- " finally:\n",
|
|
663
|
|
- " asset_val_MTM.append(risks)\n",
|
|
664
|
|
- " intent_val_MTM.append(intents)\n",
|
|
665
|
|
- " source_val_MTM.append(sources)\n",
|
|
666
|
|
- "\n",
|
|
667
|
|
- "# In[378]:\n",
|
|
668
|
|
- "\n",
|
|
669
|
|
- "# modified\n",
|
|
670
|
|
- "def get_asset_desc_MTM(asset_field):\n",
|
|
671
|
|
- " if asset_field == 'ASSETS_VAL_1':\n",
|
|
672
|
|
- " return '공인-전체IP대역(유선)'\n",
|
|
673
|
|
- " elif asset_field == 'ASSETS_VAL_2':\n",
|
|
674
|
|
- " return '공인-전체IP대역(무선)'\n",
|
|
675
|
|
- " elif asset_field == 'ASSETS_VAL_3':\n",
|
|
676
|
|
- " return '공인-WEB서버'\n",
|
|
677
|
|
- " elif asset_field == 'ASSETS_VAL_4':\n",
|
|
678
|
|
- " return '공인-내부응용서버'\n",
|
|
679
|
|
- " elif asset_field == 'ASSETS_VAL_5':\n",
|
|
680
|
|
- " return '공인-DB서버'\n",
|
|
681
|
|
- " elif asset_field == 'ASSETS_VAL_6':\n",
|
|
682
|
|
- " return '공인-패치서버'\n",
|
|
683
|
|
- " elif asset_field == 'ASSETS_VAL_7':\n",
|
|
684
|
|
- " return '공인-네트워크'\n",
|
|
685
|
|
- " elif asset_field == 'ASSETS_VAL_8':\n",
|
|
686
|
|
- " return '공인-보안'\n",
|
|
687
|
|
- " elif asset_field == 'ASSETS_VAL_9':\n",
|
|
688
|
|
- " return '공인-업무용PC'\n",
|
|
689
|
|
- " elif asset_field == 'ASSETS_VAL_10':\n",
|
|
690
|
|
- " return '공인-비업무용PC'\n",
|
|
691
|
|
- " elif asset_field == 'ASSETS_VAL_11':\n",
|
|
692
|
|
- " return '공인-기타'\n",
|
|
693
|
|
- " elif asset_field == 'ASSETS_VAL_12':\n",
|
|
694
|
|
- " return '사설-전체IP대역(유선)'\n",
|
|
695
|
|
- " elif asset_field == 'ASSETS_VAL_13':\n",
|
|
696
|
|
- " return '사설-전체IP대역(무선)'\n",
|
|
697
|
|
- " elif asset_field == 'ASSETS_VAL_14':\n",
|
|
698
|
|
- " return '사설-WEB서버'\n",
|
|
699
|
|
- " elif asset_field == 'ASSETS_VAL_15':\n",
|
|
700
|
|
- " return '사설-내부응용서버'\n",
|
|
701
|
|
- " elif asset_field == 'ASSETS_VAL_16':\n",
|
|
702
|
|
- " return '사설-DB서버'\n",
|
|
703
|
|
- " elif asset_field == 'ASSETS_VAL_17':\n",
|
|
704
|
|
- " return '사설-패치서버'\n",
|
|
705
|
|
- " elif asset_field == 'ASSETS_VAL_18':\n",
|
|
706
|
|
- " return '사설-네트워크'\n",
|
|
707
|
|
- " elif asset_field == 'ASSETS_VAL_19':\n",
|
|
708
|
|
- " return '사설-보안'\n",
|
|
709
|
|
- " elif asset_field == 'ASSETS_VAL_20':\n",
|
|
710
|
|
- " return '사설-업무용PC'\n",
|
|
711
|
|
- " elif asset_field == 'ASSETS_VAL_21':\n",
|
|
712
|
|
- " return '사설-비업무용PC'\n",
|
|
713
|
|
- " elif asset_field == 'ASSETS_VAL_22':\n",
|
|
714
|
|
- " return '사설-기타'\n",
|
|
715
|
|
- " else:\n",
|
|
716
|
|
- " return ''\n",
|
|
717
|
|
- "\n",
|
|
718
|
|
- "\n",
|
|
719
|
|
- "# In[381]:\n",
|
|
720
|
|
- "\n",
|
|
721
|
|
- "\n",
|
|
722
|
|
- "# modified\n",
|
|
723
|
|
- "def filter_intent_MTM(intent):\n",
|
|
724
|
|
- " intents=[]\n",
|
|
725
|
|
- " for intent_key in intent:\n",
|
|
726
|
|
- " if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
|
|
727
|
|
- " intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
|
|
728
|
|
- " intents.append(intent_key_desc)\n",
|
|
729
|
|
- " return intents\n",
|
|
730
|
|
- "\n",
|
|
731
|
|
- "\n",
|
|
732
|
|
- "# In[382]:\n",
|
|
733
|
|
- "\n",
|
|
734
|
|
- "\n",
|
|
735
|
|
- "def get_intent_desc_MTM(intent_field):\n",
|
|
736
|
|
- " if intent_field == 'INTENT_VAL_1':\n",
|
|
737
|
|
- " return '파괴'\n",
|
|
738
|
|
- " elif intent_field == 'INTENT_VAL_2':\n",
|
|
739
|
|
- " return '유출'\n",
|
|
740
|
|
- " elif intent_field == 'INTENT_VAL_3':\n",
|
|
741
|
|
- " return '지연'\n",
|
|
742
|
|
- " elif intent_field == 'INTENT_VAL_4':\n",
|
|
743
|
|
- " return '잠복'\n",
|
|
744
|
|
- " elif intent_field == 'INTENT_VAL_5':\n",
|
|
745
|
|
- " return '단순침입'\n",
|
|
746
|
|
- " elif intent_field == 'INTENT_VAL_6':\n",
|
|
747
|
|
- " return 'MD5'\n",
|
|
748
|
|
- " elif intent_field == 'INTENT_VAL_0':\n",
|
|
749
|
|
- " return 'Default'\n",
|
|
750
|
|
- " else:\n",
|
|
751
|
|
- " return ''\n",
|
|
752
|
|
- "\n",
|
|
753
|
|
- "\n",
|
|
754
|
|
- "\n",
|
|
755
|
|
- "# In[384]:\n",
|
|
756
|
|
- "\n",
|
|
757
|
|
- "\n",
|
|
758
|
|
- "# modified\n",
|
|
759
|
|
- "def filter_source_MTM(source):\n",
|
|
760
|
|
- " sources=[]\n",
|
|
761
|
|
- " for source_key in source:\n",
|
|
762
|
|
- " if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
|
|
763
|
|
- " source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
|
|
764
|
|
- " sources.append(source_key_desc)\n",
|
|
765
|
|
- " return sources\n",
|
|
766
|
|
- "\n",
|
|
767
|
|
- "\n",
|
|
768
|
|
- "# In[385]:\n",
|
|
769
|
|
- "\n",
|
|
770
|
|
- "\n",
|
|
771
|
|
- "def get_source_desc_MTM(source_field):\n",
|
|
772
|
|
- " if source_field=='SOURCE_VAL_1':\n",
|
|
773
|
|
- " return '북한IP'\n",
|
|
774
|
|
- " if source_field=='SOURCE_VAL_3':\n",
|
|
775
|
|
- " return 'ECSC Black IP'\n",
|
|
776
|
|
- " else:\n",
|
|
777
|
|
- " return ''\n",
|
|
778
|
|
- "\n",
|
|
779
|
|
- "\n",
|
|
780
|
|
- "# In[386]:\n",
|
|
781
|
|
- "\n",
|
|
782
|
|
- "filter_assets_value(risk_df_MTM)\n",
|
|
783
|
|
- "#뒤에 isna()를 통해 na값을 0으로 바꿔주는 작업을 하므로, 값이 비어있는 경우 [] 대신 비워두기\n",
|
|
784
|
|
- "# New assets column\n",
|
|
785
|
|
- "MTM_df['ASSETS_VAL']= asset_val_MTM\n",
|
|
786
|
|
- "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)\n",
|
|
787
|
|
- "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace('[','', regex=False)\n",
|
|
788
|
|
- "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].str.replace(']','', regex=False)\n",
|
|
789
|
|
- "MTM_df[:1]\n",
|
|
790
|
|
- "# New column of intent value\n",
|
|
791
|
|
- "MTM_df['INTENT_VAL']=intent_val_MTM\n",
|
|
792
|
|
- "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)\n",
|
|
793
|
|
- "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace('[','',regex=False)\n",
|
|
794
|
|
- "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].str.replace(']','',regex=False)\n",
|
|
795
|
|
- "MTM_df[:1]\n",
|
|
796
|
|
- "# New column of SOURCE_VAL value\n",
|
|
797
|
|
- "MTM_df['SOURCE_VAL']=source_val_MTM\n",
|
|
798
|
|
- "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n",
|
|
799
|
|
- "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace('[','',regex=False)\n",
|
|
800
|
|
- "MTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].str.replace(']','',regex=False)\n",
|
|
801
|
|
- "MTM_df[:5]\n",
|
|
802
|
|
- "\n",
|
|
803
|
|
- "# In[361]:\n",
|
|
804
|
|
- "MTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
|
|
805
|
|
- "MTM_df.columns\n",
|
|
806
|
|
- "\n",
|
|
807
|
|
- "\n",
|
|
808
|
|
- "# In[388]:\n",
|
|
809
|
|
- "\n",
|
|
810
|
|
- "\n",
|
|
811
|
|
- "MTM_df.isna().sum()\n",
|
|
812
|
|
- "\n",
|
|
813
|
|
- "\n",
|
|
814
|
|
- "# In[389]:\n",
|
|
815
|
|
- "\n",
|
|
816
|
|
- "\n",
|
|
817
|
|
- "# Change the Nan to zero\n",
|
|
818
|
|
- "MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
|
|
819
|
|
- "MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')\n",
|
|
820
|
|
- "MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
|
|
821
|
|
- "MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
|
|
822
|
|
- "MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
|
|
823
|
|
- "MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
|
|
824
|
|
- "MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
|
|
825
|
|
- "MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
|
|
826
|
|
- "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
|
|
827
|
|
- "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)\n",
|
|
828
|
|
- "MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
|
|
829
|
|
- "MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')\n",
|
|
830
|
|
- "\n",
|
|
831
|
|
- "\n",
|
|
832
|
|
- "# In[390]:\n",
|
|
833
|
|
- "\n",
|
|
834
|
|
- "\n",
|
|
835
|
|
- "# Check NaN out again\n",
|
|
836
|
|
- "MTM_df.isna().sum()\n",
|
|
837
|
|
- "\n",
|
|
838
|
|
- "\n",
|
|
839
|
|
- "# In[391]:\n",
|
|
840
|
|
- "\n",
|
|
841
|
|
- "# ACCD_FIND_MTD_CODE col 지우기\n",
|
|
842
|
|
- "MTM_df.drop(columns=['ACCD_FIND_MTD_CODE'], inplace=True)\n",
|
|
843
|
|
- "\n",
|
|
844
|
|
- "# arr를 매개변수로 받아 n개의 아이템의 조합 반환\n",
|
|
845
|
|
- "def get_combination_MTM(arr, n):\n",
|
|
846
|
|
- " combination_n = list(itertools.combinations(arr.columns.tolist(),n))\n",
|
|
847
|
|
- " combination_n = [com for com in combination_n if 'DRULE_ATT_TYPE_CODE1' in com]\n",
|
|
848
|
|
- " com_list=[]\n",
|
|
849
|
|
- " for m in range(len(combination_n)):\n",
|
|
850
|
|
- " for i in range(len(arr)):\n",
|
|
851
|
|
- " temp_list=[]\n",
|
|
852
|
|
- " temp_df = arr.iloc[i]\n",
|
|
853
|
|
- " for col in combination_n[m]:\n",
|
|
854
|
|
- " # 공백 처리\n",
|
|
855
|
|
- " if(temp_df[col]==''):\n",
|
|
856
|
|
- " break\n",
|
|
857
|
|
- " else:\n",
|
|
858
|
|
- " temp_list.append(temp_df[col])\n",
|
|
859
|
|
- " com_list.append(temp_list)\n",
|
|
860
|
|
- " prefix = get_prefixspan_MTM(com_list)\n",
|
|
861
|
|
- " return prefix\n",
|
|
862
|
|
- "\n",
|
|
863
|
|
- "def get_prefixspan_MTM(load_list):\n",
|
|
864
|
|
- " n = len(load_list[0])\n",
|
|
865
|
|
- " save_list = PrefixSpan(load_list)\n",
|
|
866
|
|
- " #n개 아이템 조합으로 이루어졌는데 n보다 작은 갯수의 아이템으로 이루어진 prefixspan 결과 값 나옴 \n",
|
|
867
|
|
- " # 방지하기 위해 prefixspan의 결과값에는 'n개의 아이템의 값'이 다 들어가도록 filter 설정\n",
|
|
868
|
|
- " save_list = save_list.frequent(1,filter = lambda patt, matches:len(patt)>=n)\n",
|
|
869
|
|
- " save_df = pd.DataFrame(save_list)\n",
|
|
870
|
|
- " save_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
|
|
871
|
|
- " save_df = save_df.sort_values(by=['Frequency'],ascending=False,ignore_index=True)\n",
|
|
872
|
|
- " save_df = get_effect_MTM(save_df)\n",
|
|
873
|
|
- " return save_df\n",
|
|
874
|
|
- "\n",
|
|
875
|
|
- "def get_effect_MTM(edit_df):\n",
|
|
876
|
|
- " #Make the new column for filling the Effect\n",
|
|
877
|
|
- " edit_df['Effect']=np.nan\n",
|
|
878
|
|
- " #Change the order of columns\n",
|
|
879
|
|
- " edit_df=edit_df[['Cause','Effect','Frequency']]\n",
|
|
880
|
|
- " for i in range(len(edit_df)):\n",
|
|
881
|
|
- " drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
|
|
882
|
|
- " temp_df = edit_df.loc[i]\n",
|
|
883
|
|
- " for item in temp_df['Cause']:\n",
|
|
884
|
|
- " for drule in drules:\n",
|
|
885
|
|
- " if item == drule:\n",
|
|
886
|
|
- " edit_df.loc[i,'Effect'] = item\n",
|
|
887
|
|
- " return edit_df\n",
|
|
888
|
|
- "\n",
|
|
889
|
|
- "\n",
|
|
890
|
|
- "\n",
|
|
891
|
|
- "# 1. 두 아이템의 조합\n",
|
|
892
|
|
- "prefix_of_two_MTM = get_combination(MTM_df,2)\n",
|
|
893
|
|
- "\n",
|
|
894
|
|
- "# 2. 세 아이템의 조합\n",
|
|
895
|
|
- "prefix_of_three_MTM = get_combination(MTM_df, 3)\n",
|
|
896
|
|
- "\n",
|
|
897
|
|
- "# 3. 네 아이템의 조합\n",
|
|
898
|
|
- "prefix_of_four_MTM = get_combination(MTM_df, 4)\n",
|
|
899
|
|
- "\n",
|
|
900
|
|
- "# 4. 다섯 아이템의 조합\n",
|
|
901
|
|
- "prefix_of_five_MTM = get_combination(MTM_df, 5)\n",
|
|
902
|
|
- "\n",
|
|
903
|
|
- "\n",
|
|
904
|
|
- "# 5. 여섯 아이템의 조합\n",
|
|
905
|
|
- "prefix_of_six_MTM = get_combination(MTM_df, 6)\n",
|
|
906
|
|
- "\n",
|
|
907
|
|
- "##################### MTM section End #####################"
|
|
908
|
|
- ]
|
|
909
|
|
- }
|
|
910
|
|
- ],
|
|
911
|
|
- "metadata": {
|
|
912
|
|
- "anaconda-cloud": {},
|
|
913
|
|
- "kernelspec": {
|
|
914
|
|
- "display_name": "Python 3",
|
|
915
|
|
- "language": "python",
|
|
916
|
|
- "name": "python3"
|
|
917
|
|
- },
|
|
918
|
|
- "language_info": {
|
|
919
|
|
- "codemirror_mode": {
|
|
920
|
|
- "name": "ipython",
|
|
921
|
|
- "version": 3
|
|
922
|
|
- },
|
|
923
|
|
- "file_extension": ".py",
|
|
924
|
|
- "mimetype": "text/x-python",
|
|
925
|
|
- "name": "python",
|
|
926
|
|
- "nbconvert_exporter": "python",
|
|
927
|
|
- "pygments_lexer": "ipython3",
|
|
928
|
|
- "version": "3.8.8"
|
|
929
|
|
- }
|
|
930
|
|
- },
|
|
931
|
|
- "nbformat": 4,
|
|
932
|
|
- "nbformat_minor": 4
|
|
933
|
|
-}
|