#!/usr/bin/env python # coding: utf-8 # In[1]: import os import array import math import pickle # import joblib import sys import argparse import pandas as pd import numpy as np import matplotlib.pyplot as plt from datetime import datetime from pprint import pprint import ssl from elasticsearch.connection import create_ssl_context from elasticsearch import Elasticsearch from elasticsearch import helpers import urllib3 # In[3]: import pandas as pd import numpy as np from mlxtend.preprocessing import TransactionEncoder from mlxtend.frequent_patterns import association_rules, fpgrowth from prefixspan import PrefixSpan # In[4]: ssl_context = create_ssl_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE # In[12]: es = Elasticsearch(hosts=[{'host': '223.194.92.152', 'port': 9200}], scheme="http",verify_certs=False, timeout=300, ssl_context=ssl_context, http_auth=("elasticsearch", "hadoop2019@!@#$")) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # In[135]: ######## 2020, 1 year ######## body = { "size" : 100, "query": { "range":{ "TW_COLLECT_DT":{ "gte":"2020-01-01T00:00:00.625+09:00", "lte":"2020-12-31T00:00:00.625+09:00" ################ } } } } res = es.search(index = 'ts_data_accident-2020', body=body) data = res['hits']['hits'] total = res['hits']['total'] print(total) accident = [] for da in data: att_type = da['_source'] # att_type["POL_NM"]=att_type["SCEN_INFOS"][0]["POL_NM"] accident.append(att_type) # df = pd.DataFrame(accident,dtype=str) df = pd.DataFrame(accident) print(df.head()) # In[136]: df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE']] df.head() # In[248]: # import ast # Pick out it in order to get the asset, risk, intent, black IP out RISK_V2=df['RISK_V2'] # risk_values=RISK_V2.values # print(risk_values) # print(type(risk_value[0])) # risk_v2_zero=RISK_V2[0] # print(RISK_V2.values[:2]) # dict_risk_v2=ast.literal_eval(RISK_V2[0]) # print(dict[0]) # In[229]: def filter_assets_value(risk): risks=[] try: for risk_key in risk: if 'ASSETS_VAL_' in risk_key and risk[risk_key]: risks.append(risk_key) except: print(risk) print(type(risk)) finally: return risks # In[106]: # # modified # def filter_assets_value(risk): # risks=[] # for risk_key in risk: # if 'ASSETS_VAL_' in risk_key and risk[risk_key]: # risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key) # risks.append(risk_key_desc) # return risks # In[115]: # modified def get_asset_desc(asset_field): if asset_field == 'ASSETS_VAL_1': return '공인-전체IP대역(유선)' elif asset_field == 'ASSETS_VAL_2': return '공인-전체IP대역(무선)' elif asset_field == 'ASSETS_VAL_3': return '공인-WEB서버' elif asset_field == 'ASSETS_VAL_4': return '공인-내부응용서버' elif asset_field == 'ASSETS_VAL_5': return '공인-DB서버' elif asset_field == 'ASSETS_VAL_6': return '공인-패치서버' elif asset_field == 'ASSETS_VAL_7': return '공인-네트워크' elif asset_field == 'ASSETS_VAL_8': return '공인-보안' elif asset_field == 'ASSETS_VAL_9': return '공인-업무용PC' elif asset_field == 'ASSETS_VAL_10': return '공인-비업무용PC' elif asset_field == 'ASSETS_VAL_11': return '공인-기타' elif asset_field == 'ASSETS_VAL_12': return '사설-전체IP대역(유선)' elif asset_field == 'ASSETS_VAL_13': return '사설-전체IP대역(무선)' elif asset_field == 'ASSETS_VAL_14': return '사설-WEB서버' elif asset_field == 'ASSETS_VAL_15': return '사설-내부응용서버' elif asset_field == 'ASSETS_VAL_16': return '사설-DB서버' elif asset_field == 'ASSETS_VAL_17': return '사설-패치서버' elif asset_field == 'ASSETS_VAL_18': return '사설-네트워크' elif asset_field == 'ASSETS_VAL_19': return '사설-보안' elif asset_field == 'ASSETS_VAL_20': return '사설-업무용PC' elif asset_field == 'ASSETS_VAL_21': return '사설-비업무용PC' elif asset_field == 'ASSETS_VAL_22': return '사설-기타' else: return '' # In[250]: # New assets column x=list(map(filter_assets_value, RISK_V2)) # print(list(filter(lambda n:n!='None',df['ASSETS_VAL']))) len(x) # In[ ]: