| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- #!/usr/bin/env python
- # coding: utf-8
-
- # In[1]:
-
-
- import os
- import array
- import math
- import pickle
- # import joblib
- import sys
- import argparse
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from datetime import datetime
- from pprint import pprint
- import ssl
- from elasticsearch.connection import create_ssl_context
- from elasticsearch import Elasticsearch
- from elasticsearch import helpers
- import urllib3
-
-
- # In[3]:
-
-
- import pandas as pd
- import numpy as np
- from mlxtend.preprocessing import TransactionEncoder
- from mlxtend.frequent_patterns import association_rules, fpgrowth
- from prefixspan import PrefixSpan
-
-
- # In[4]:
-
-
- ssl_context = create_ssl_context()
- ssl_context.check_hostname = False
- ssl_context.verify_mode = ssl.CERT_NONE
-
-
- # In[12]:
-
-
- es = Elasticsearch(hosts=[{'host': '223.194.92.152', 'port': 9200}], scheme="http",verify_certs=False, timeout=300, ssl_context=ssl_context, http_auth=("elasticsearch", "hadoop2019@!@#$"))
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-
- # In[135]:
-
-
- ######## 2020, 1 year ########
-
- body = {
- "size" : 100,
- "query": {
- "range":{
- "TW_COLLECT_DT":{
- "gte":"2020-01-01T00:00:00.625+09:00",
- "lte":"2020-12-31T00:00:00.625+09:00" ################
- }
- }
- }
- }
-
- res = es.search(index = 'ts_data_accident-2020', body=body)
- data = res['hits']['hits']
- total = res['hits']['total']
-
- print(total)
-
- accident = []
- for da in data:
- att_type = da['_source']
- # att_type["POL_NM"]=att_type["SCEN_INFOS"][0]["POL_NM"]
- accident.append(att_type)
-
- # df = pd.DataFrame(accident,dtype=str)
- df = pd.DataFrame(accident)
-
- print(df.head())
-
-
- # In[136]:
-
-
- df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE']]
- df.head()
-
-
- # In[248]:
-
-
- # import ast
-
- # Pick out it in order to get the asset, risk, intent, black IP out
- RISK_V2=df['RISK_V2']
- # risk_values=RISK_V2.values
- # print(risk_values)
-
-
- # print(type(risk_value[0]))
-
-
- # risk_v2_zero=RISK_V2[0]
- # print(RISK_V2.values[:2])
- # dict_risk_v2=ast.literal_eval(RISK_V2[0])
- # print(dict[0])
-
-
- # In[229]:
-
-
- def filter_assets_value(risk):
- risks=[]
- try:
- for risk_key in risk:
- if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
- risks.append(risk_key)
- except:
- print(risk)
- print(type(risk))
- finally:
- return risks
-
-
-
-
- # In[106]:
-
-
- # # modified
- # def filter_assets_value(risk):
- # risks=[]
- # for risk_key in risk:
- # if 'ASSETS_VAL_' in risk_key and risk[risk_key]:
- # risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)
- # risks.append(risk_key_desc)
- # return risks
-
-
- # In[115]:
-
-
- # modified
- def get_asset_desc(asset_field):
- if asset_field == 'ASSETS_VAL_1':
- return '공인-전체IP대역(유선)'
- elif asset_field == 'ASSETS_VAL_2':
- return '공인-전체IP대역(무선)'
- elif asset_field == 'ASSETS_VAL_3':
- return '공인-WEB서버'
- elif asset_field == 'ASSETS_VAL_4':
- return '공인-내부응용서버'
- elif asset_field == 'ASSETS_VAL_5':
- return '공인-DB서버'
- elif asset_field == 'ASSETS_VAL_6':
- return '공인-패치서버'
- elif asset_field == 'ASSETS_VAL_7':
- return '공인-네트워크'
- elif asset_field == 'ASSETS_VAL_8':
- return '공인-보안'
- elif asset_field == 'ASSETS_VAL_9':
- return '공인-업무용PC'
- elif asset_field == 'ASSETS_VAL_10':
- return '공인-비업무용PC'
- elif asset_field == 'ASSETS_VAL_11':
- return '공인-기타'
- elif asset_field == 'ASSETS_VAL_12':
- return '사설-전체IP대역(유선)'
- elif asset_field == 'ASSETS_VAL_13':
- return '사설-전체IP대역(무선)'
- elif asset_field == 'ASSETS_VAL_14':
- return '사설-WEB서버'
- elif asset_field == 'ASSETS_VAL_15':
- return '사설-내부응용서버'
- elif asset_field == 'ASSETS_VAL_16':
- return '사설-DB서버'
- elif asset_field == 'ASSETS_VAL_17':
- return '사설-패치서버'
- elif asset_field == 'ASSETS_VAL_18':
- return '사설-네트워크'
- elif asset_field == 'ASSETS_VAL_19':
- return '사설-보안'
- elif asset_field == 'ASSETS_VAL_20':
- return '사설-업무용PC'
- elif asset_field == 'ASSETS_VAL_21':
- return '사설-비업무용PC'
- elif asset_field == 'ASSETS_VAL_22':
- return '사설-기타'
- else:
- return ''
-
-
- # In[250]:
-
-
- # New assets column
- x=list(map(filter_assets_value, RISK_V2))
- # print(list(filter(lambda n:n!='None',df['ASSETS_VAL'])))
- len(x)
-
-
- # In[ ]:
-
-
-
|