|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+#!/usr/bin/env python
|
|
|
2
|
+# coding: utf-8
|
|
|
3
|
+
|
|
|
4
|
+# In[1]:
|
|
|
5
|
+
|
|
|
6
|
+
|
|
|
7
|
+import os
|
|
|
8
|
+import array
|
|
|
9
|
+import math
|
|
|
10
|
+import pickle
|
|
|
11
|
+# import joblib
|
|
|
12
|
+import sys
|
|
|
13
|
+import argparse
|
|
|
14
|
+import pandas as pd
|
|
|
15
|
+import numpy as np
|
|
|
16
|
+import matplotlib.pyplot as plt
|
|
|
17
|
+from datetime import datetime
|
|
|
18
|
+from pprint import pprint
|
|
|
19
|
+import ssl
|
|
|
20
|
+from elasticsearch.connection import create_ssl_context
|
|
|
21
|
+from elasticsearch import Elasticsearch
|
|
|
22
|
+from elasticsearch import helpers
|
|
|
23
|
+import urllib3
|
|
|
24
|
+
|
|
|
25
|
+
|
|
|
26
|
+# In[3]:
|
|
|
27
|
+
|
|
|
28
|
+
|
|
|
29
|
+import pandas as pd
|
|
|
30
|
+import numpy as np
|
|
|
31
|
+from mlxtend.preprocessing import TransactionEncoder
|
|
|
32
|
+from mlxtend.frequent_patterns import association_rules, fpgrowth
|
|
|
33
|
+from prefixspan import PrefixSpan
|
|
|
34
|
+
|
|
|
35
|
+
|
|
|
36
|
+# In[4]:
|
|
|
37
|
+
|
|
|
38
|
+
|
|
|
39
|
+ssl_context = create_ssl_context()
|
|
|
40
|
+ssl_context.check_hostname = False
|
|
|
41
|
+ssl_context.verify_mode = ssl.CERT_NONE
|
|
|
42
|
+
|
|
|
43
|
+
|
|
|
44
|
+# In[12]:
|
|
|
45
|
+
|
|
|
46
|
+
|
|
|
47
|
+es = Elasticsearch(hosts=[{'host': '223.194.92.152', 'port': 9200}], scheme="http",verify_certs=False, timeout=300, ssl_context=ssl_context, http_auth=("elasticsearch", "hadoop2019@!@#$"))
|
|
|
48
|
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
49
|
+
|
|
|
50
|
+
|
|
|
51
|
+# In[135]:
|
|
|
52
|
+
|
|
|
53
|
+
|
|
|
54
|
+######## 2020, 1 year ########
|
|
|
55
|
+
|
|
|
56
|
+body = {
|
|
|
57
|
+ "size" : 100,
|
|
|
58
|
+ "query": {
|
|
|
59
|
+ "range":{
|
|
|
60
|
+ "TW_COLLECT_DT":{
|
|
|
61
|
+ "gte":"2020-01-01T00:00:00.625+09:00",
|
|
|
62
|
+ "lte":"2020-12-31T00:00:00.625+09:00" ################
|
|
|
63
|
+ }
|
|
|
64
|
+ }
|
|
|
65
|
+ }
|
|
|
66
|
+}
|
|
|
67
|
+
|
|
|
68
|
+res = es.search(index = 'ts_data_accident-2020', body=body)
|
|
|
69
|
+data = res['hits']['hits']
|
|
|
70
|
+total = res['hits']['total']
|
|
|
71
|
+
|
|
|
72
|
+print(total)
|
|
|
73
|
+
|
|
|
74
|
+accident = []
|
|
|
75
|
+for da in data:
|
|
|
76
|
+ att_type = da['_source']
|
|
|
77
|
+ # att_type["POL_NM"]=att_type["SCEN_INFOS"][0]["POL_NM"]
|
|
|
78
|
+ accident.append(att_type)
|
|
|
79
|
+
|
|
|
80
|
+# df = pd.DataFrame(accident,dtype=str)
|
|
|
81
|
+df = pd.DataFrame(accident)
|
|
|
82
|
+
|
|
|
83
|
+print(df.head())
|
|
|
84
|
+
|
|
|
85
|
+
|
|
|
86
|
+# In[136]:
|
|
|
87
|
+
|
|
|
88
|
+
|
|
|
89
|
+df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE']]
|
|
|
90
|
+df.head()
|
|
|
91
|
+
|
|
|
92
|
+
|
|
|
93
|
+# In[248]:
|
|
|
94
|
+
|
|
|
95
|
+
|
|
|
96
|
+# import ast
|
|
|
97
|
+
|
|
|
98
|
+# Pick out it in order to get the asset, risk, intent, black IP out
|
|
|
99
|
+RISK_V2=df['RISK_V2']
|
|
|
100
|
+# risk_values=RISK_V2.values
|
|
|
101
|
+# print(risk_values)
|
|
|
102
|
+
|
|
|
103
|
+
|
|
|
104
|
+# print(type(risk_value[0]))
|
|
|
105
|
+
|
|
|
106
|
+
|
|
|
107
|
+# risk_v2_zero=RISK_V2[0]
|
|
|
108
|
+# print(RISK_V2.values[:2])
|
|
|
109
|
+# dict_risk_v2=ast.literal_eval(RISK_V2[0])
|
|
|
110
|
+# print(dict[0])
|
|
|
111
|
+
|
|
|
112
|
+
|
|
|
113
|
+# In[229]:
|
|
|
114
|
+
|
|
|
115
|
+
|
|
|
116
|
+def filter_assets_value(risk):
|
|
|
117
|
+ risks=[]
|
|
|
118
|
+ try:
|
|
|
119
|
+ for risk_key in risk:
|
|
|
120
|
+ if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
|
|
|
121
|
+ risks.append(risk_key)
|
|
|
122
|
+ except:
|
|
|
123
|
+ print(risk)
|
|
|
124
|
+ print(type(risk))
|
|
|
125
|
+ finally:
|
|
|
126
|
+ return risks
|
|
|
127
|
+
|
|
|
128
|
+
|
|
|
129
|
+
|
|
|
130
|
+
|
|
|
131
|
+# In[106]:
|
|
|
132
|
+
|
|
|
133
|
+
|
|
|
134
|
+# # modified
|
|
|
135
|
+# def filter_assets_value(risk):
|
|
|
136
|
+# risks=[]
|
|
|
137
|
+# for risk_key in risk:
|
|
|
138
|
+# if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
|
|
|
139
|
+# risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)
|
|
|
140
|
+# risks.append(risk_key_desc)
|
|
|
141
|
+# return risks
|
|
|
142
|
+
|
|
|
143
|
+
|
|
|
144
|
+# In[115]:
|
|
|
145
|
+
|
|
|
146
|
+
|
|
|
147
|
+# modified
|
|
|
148
|
+def get_asset_desc(asset_field):
|
|
|
149
|
+ if asset_field == 'ASSETS_VAL_1':
|
|
|
150
|
+ return '공인-전체IP대역(유선)'
|
|
|
151
|
+ elif asset_field == 'ASSETS_VAL_2':
|
|
|
152
|
+ return '공인-전체IP대역(무선)'
|
|
|
153
|
+ elif asset_field == 'ASSETS_VAL_3':
|
|
|
154
|
+ return '공인-WEB서버'
|
|
|
155
|
+ elif asset_field == 'ASSETS_VAL_4':
|
|
|
156
|
+ return '공인-내부응용서버'
|
|
|
157
|
+ elif asset_field == 'ASSETS_VAL_5':
|
|
|
158
|
+ return '공인-DB서버'
|
|
|
159
|
+ elif asset_field == 'ASSETS_VAL_6':
|
|
|
160
|
+ return '공인-패치서버'
|
|
|
161
|
+ elif asset_field == 'ASSETS_VAL_7':
|
|
|
162
|
+ return '공인-네트워크'
|
|
|
163
|
+ elif asset_field == 'ASSETS_VAL_8':
|
|
|
164
|
+ return '공인-보안'
|
|
|
165
|
+ elif asset_field == 'ASSETS_VAL_9':
|
|
|
166
|
+ return '공인-업무용PC'
|
|
|
167
|
+ elif asset_field == 'ASSETS_VAL_10':
|
|
|
168
|
+ return '공인-비업무용PC'
|
|
|
169
|
+ elif asset_field == 'ASSETS_VAL_11':
|
|
|
170
|
+ return '공인-기타'
|
|
|
171
|
+ elif asset_field == 'ASSETS_VAL_12':
|
|
|
172
|
+ return '사설-전체IP대역(유선)'
|
|
|
173
|
+ elif asset_field == 'ASSETS_VAL_13':
|
|
|
174
|
+ return '사설-전체IP대역(무선)'
|
|
|
175
|
+ elif asset_field == 'ASSETS_VAL_14':
|
|
|
176
|
+ return '사설-WEB서버'
|
|
|
177
|
+ elif asset_field == 'ASSETS_VAL_15':
|
|
|
178
|
+ return '사설-내부응용서버'
|
|
|
179
|
+ elif asset_field == 'ASSETS_VAL_16':
|
|
|
180
|
+ return '사설-DB서버'
|
|
|
181
|
+ elif asset_field == 'ASSETS_VAL_17':
|
|
|
182
|
+ return '사설-패치서버'
|
|
|
183
|
+ elif asset_field == 'ASSETS_VAL_18':
|
|
|
184
|
+ return '사설-네트워크'
|
|
|
185
|
+ elif asset_field == 'ASSETS_VAL_19':
|
|
|
186
|
+ return '사설-보안'
|
|
|
187
|
+ elif asset_field == 'ASSETS_VAL_20':
|
|
|
188
|
+ return '사설-업무용PC'
|
|
|
189
|
+ elif asset_field == 'ASSETS_VAL_21':
|
|
|
190
|
+ return '사설-비업무용PC'
|
|
|
191
|
+ elif asset_field == 'ASSETS_VAL_22':
|
|
|
192
|
+ return '사설-기타'
|
|
|
193
|
+ else:
|
|
|
194
|
+ return ''
|
|
|
195
|
+
|
|
|
196
|
+
|
|
|
197
|
+# In[250]:
|
|
|
198
|
+
|
|
|
199
|
+
|
|
|
200
|
+# New assets column
|
|
|
201
|
+x=list(map(filter_assets_value, RISK_V2))
|
|
|
202
|
+# print(list(filter(lambda n:n!='None',df['ASSETS_VAL'])))
|
|
|
203
|
+len(x)
|
|
|
204
|
+
|
|
|
205
|
+
|
|
|
206
|
+# In[ ]:
|
|
|
207
|
+
|
|
|
208
|
+
|
|
|
209
|
+
|
|
|
210
|
+
|