Browse Source

파일 업로드 'keris.ipynb'

master
YoonJoohyun 4 years ago
parent
commit
c9232a383d
1 changed files with 210 additions and 0 deletions
  1. 210
    0
      keris.ipynb/PrefixSpan_20210925.py

+ 210
- 0
keris.ipynb/PrefixSpan_20210925.py View File

@@ -0,0 +1,210 @@
1
+#!/usr/bin/env python
2
+# coding: utf-8
3
+
4
+# In[1]:
5
+
6
+
7
+import os
8
+import array
9
+import math
10
+import pickle
11
+# import joblib
12
+import sys
13
+import argparse
14
+import pandas as pd
15
+import numpy as np
16
+import matplotlib.pyplot as plt
17
+from datetime import datetime
18
+from pprint import pprint
19
+import ssl
20
+from elasticsearch.connection import create_ssl_context
21
+from elasticsearch import Elasticsearch
22
+from elasticsearch import helpers
23
+import urllib3
24
+
25
+
26
+# In[3]:
27
+
28
+
29
+import pandas as pd
30
+import numpy as np
31
+from mlxtend.preprocessing import TransactionEncoder
32
+from mlxtend.frequent_patterns import association_rules, fpgrowth
33
+from prefixspan import PrefixSpan
34
+
35
+
36
+# In[4]:
37
+
38
+
39
+ssl_context = create_ssl_context()
40
+ssl_context.check_hostname = False
41
+ssl_context.verify_mode = ssl.CERT_NONE
42
+
43
+
44
+# In[12]:
45
+
46
+
47
+es = Elasticsearch(hosts=[{'host': '223.194.92.152', 'port': 9200}], scheme="http",verify_certs=False, timeout=300, ssl_context=ssl_context, http_auth=("elasticsearch", "hadoop2019@!@#$"))
48
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
49
+
50
+
51
+# In[135]:
52
+
53
+
54
+######## 2020, 1 year ########
55
+
56
+body = {
57
+         "size" : 100,
58
+         "query": {
59
+                 "range":{
60
+                    "TW_COLLECT_DT":{
61
+                        "gte":"2020-01-01T00:00:00.625+09:00",
62
+                        "lte":"2020-12-31T00:00:00.625+09:00" ################
63
+                    }
64
+                }
65
+                 }
66
+}
67
+        
68
+res = es.search(index = 'ts_data_accident-2020', body=body)
69
+data = res['hits']['hits']
70
+total = res['hits']['total']
71
+
72
+print(total)
73
+
74
+accident = []
75
+for da in data:
76
+    att_type = da['_source']
77
+    # att_type["POL_NM"]=att_type["SCEN_INFOS"][0]["POL_NM"]
78
+    accident.append(att_type)
79
+
80
+# df = pd.DataFrame(accident,dtype=str)
81
+df = pd.DataFrame(accident)
82
+
83
+print(df.head())
84
+
85
+
86
+# In[136]:
87
+
88
+
89
+df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE']]
90
+df.head()
91
+
92
+
93
+# In[248]:
94
+
95
+
96
+# import ast
97
+
98
+# Pick out it in order to get the asset, risk, intent, black IP out
99
+RISK_V2=df['RISK_V2']
100
+# risk_values=RISK_V2.values
101
+# print(risk_values)
102
+
103
+
104
+# print(type(risk_value[0]))
105
+
106
+
107
+# risk_v2_zero=RISK_V2[0]
108
+# print(RISK_V2.values[:2])
109
+# dict_risk_v2=ast.literal_eval(RISK_V2[0])
110
+# print(dict[0])
111
+
112
+
113
+# In[229]:
114
+
115
+
116
+def filter_assets_value(risk):
117
+  risks=[]
118
+  try:
119
+    for risk_key in risk:
120
+      if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
121
+        risks.append(risk_key)
122
+  except:
123
+    print(risk)
124
+    print(type(risk))
125
+  finally:
126
+    return risks
127
+  
128
+  
129
+
130
+
131
+# In[106]:
132
+
133
+
134
+# # modified
135
+# def filter_assets_value(risk):
136
+#   risks=[]
137
+#   for risk_key in risk:
138
+#     if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
139
+#      risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)
140
+#      risks.append(risk_key_desc)
141
+#   return risks
142
+
143
+
144
+# In[115]:
145
+
146
+
147
+# modified
148
+def get_asset_desc(asset_field):
149
+  if asset_field == 'ASSETS_VAL_1':
150
+    return '공인-전체IP대역(유선)'
151
+  elif asset_field == 'ASSETS_VAL_2':
152
+    return '공인-전체IP대역(무선)'
153
+  elif asset_field == 'ASSETS_VAL_3':
154
+    return '공인-WEB서버'
155
+  elif asset_field == 'ASSETS_VAL_4':
156
+    return '공인-내부응용서버'
157
+  elif asset_field == 'ASSETS_VAL_5':
158
+    return '공인-DB서버'
159
+  elif asset_field == 'ASSETS_VAL_6':
160
+    return '공인-패치서버'
161
+  elif asset_field == 'ASSETS_VAL_7':
162
+    return '공인-네트워크'
163
+  elif asset_field == 'ASSETS_VAL_8':
164
+    return '공인-보안'
165
+  elif asset_field == 'ASSETS_VAL_9':
166
+    return '공인-업무용PC'
167
+  elif asset_field == 'ASSETS_VAL_10':
168
+    return '공인-비업무용PC'
169
+  elif asset_field == 'ASSETS_VAL_11':
170
+    return '공인-기타'
171
+  elif asset_field == 'ASSETS_VAL_12':
172
+    return '사설-전체IP대역(유선)'
173
+  elif asset_field == 'ASSETS_VAL_13':
174
+    return '사설-전체IP대역(무선)'
175
+  elif asset_field == 'ASSETS_VAL_14':
176
+    return '사설-WEB서버'
177
+  elif asset_field == 'ASSETS_VAL_15':
178
+    return '사설-내부응용서버'
179
+  elif asset_field == 'ASSETS_VAL_16':
180
+    return '사설-DB서버'
181
+  elif asset_field == 'ASSETS_VAL_17':
182
+    return '사설-패치서버'
183
+  elif asset_field == 'ASSETS_VAL_18':
184
+    return '사설-네트워크'
185
+  elif asset_field == 'ASSETS_VAL_19':
186
+    return '사설-보안'
187
+  elif asset_field == 'ASSETS_VAL_20':
188
+    return '사설-업무용PC'
189
+  elif asset_field == 'ASSETS_VAL_21':
190
+    return '사설-비업무용PC'
191
+  elif asset_field == 'ASSETS_VAL_22':
192
+    return '사설-기타'
193
+  else:
194
+    return ''
195
+
196
+
197
+# In[250]:
198
+
199
+
200
+# New assets column
201
+x=list(map(filter_assets_value, RISK_V2))
202
+# print(list(filter(lambda n:n!='None',df['ASSETS_VAL'])))
203
+len(x)
204
+
205
+
206
+# In[ ]:
207
+
208
+
209
+
210
+

Loading…
Cancel
Save