| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533 |
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "source": [
- "import os\n",
- "import array\n",
- "import math\n",
- "import pickle\n",
- "# import joblib\n",
- "import sys\n",
- "import argparse\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from datetime import datetime\n",
- "from pprint import pprint\n",
- "import ssl\n",
- "from elasticsearch.connection import create_ssl_context\n",
- "from elasticsearch import Elasticsearch\n",
- "from elasticsearch import helpers\n",
- "import urllib3"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "source": [
- "import pandas as pd\n",
- "import numpy as np\n",
- "from mlxtend.preprocessing import TransactionEncoder\n",
- "from mlxtend.frequent_patterns import association_rules, fpgrowth\n",
- "from prefixspan import PrefixSpan"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "source": [
- "ssl_context = create_ssl_context()\n",
- "ssl_context.check_hostname = False\n",
- "ssl_context.verify_mode = ssl.CERT_NONE"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "source": [
- "es = Elasticsearch(hosts=[{'host': '223.194.92.152', 'port': 9200}], scheme=\"http\",verify_certs=False, timeout=300, ssl_context=ssl_context, http_auth=(\"elasticsearch\", \"hadoop2019@!@#$\"))\n",
- "urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/Users/joohyunyoon/.pyenv/versions/anaconda3-2021.05/lib/python3.8/site-packages/elasticsearch/connection/http_urllib3.py:158: UserWarning: When using `ssl_context`, all other SSL related kwargs are ignored\n",
- " warnings.warn(\n"
- ]
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 347,
- "source": [
- "######## 2020, 1 year ########\n",
- "######## There are no MTM data in 2018, 2019 ########\n",
- "\n",
- "body = {\n",
- " \"size\" : 10000,\n",
- " \"query\": {\n",
- " \"range\":{\n",
- " \"TW_COLLECT_DT\":{\n",
- " \"gte\":\"2020-01-01T00:00:00.625+09:00\",\n",
- " \"lte\":\"2020-12-31T00:00:00.625+09:00\" ################\n",
- " }\n",
- " }\n",
- " }\n",
- "}\n",
- " \n",
- "res = es.search(index = 'ts_data_accident-2020', body=body)\n",
- "data = res['hits']['hits']\n",
- "total = res['hits']['total']\n",
- "\n",
- "print(total)\n",
- "\n",
- "accident = []\n",
- "for da in data:\n",
- " att_type = da['_source']\n",
- " # att_type[\"POL_NM\"]=att_type[\"SCEN_INFOS\"][0][\"POL_NM\"]\n",
- " accident.append(att_type)\n",
- "\n",
- "# df = pd.DataFrame(accident,dtype=str)\n",
- "df = pd.DataFrame(accident)\n",
- "\n",
- "print(df.head())"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "<ipython-input-347-d3fe348d5858>:16: DeprecationWarning: The 'body' parameter is deprecated for the 'search' API and will be removed in 8.0.0. Instead use API parameters directly. See https://github.com/elastic/elasticsearch-py/issues/1698 for more information\n",
- " res = es.search(index = 'ts_data_accident-2020', body=body)\n"
- ]
- },
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "26531\n",
- " TW_ATT_IP_SEARCH_DATA \\\n",
- "0 {'ATT_DETAIL_ORG_NM': '계원예술대학교', 'INST_HIGH_CO... \n",
- "1 {'ATT_DETAIL_ORG_NM': '국민대학교', 'INST_HIGH_CODE... \n",
- "2 {'ATT_DETAIL_ORG_NM': '서울여자대학교', 'INST_HIGH_CO... \n",
- "3 {'ATT_DETAIL_ORG_NM': '서울대학교', 'INST_HIGH_CODE... \n",
- "4 None \n",
- "\n",
- " TW_ATT_GEOLOCATION ACCD_CHARGER_ID \\\n",
- "0 37.5112, 126.97409999999999 kmryu787 \n",
- "1 37.751, -97.822 hk8120 \n",
- "2 34.7725, 113.7266 sjsong \n",
- "3 50.9919, -4.2474 hurache \n",
- "4 41.0, 64.0 deuniv \n",
- "\n",
- " ACCD_DMG_ATTACK_NM ACCD_DCL_INST_NM \\\n",
- "0 Malwr-Resource-29-01-coinminer.18033003ECSC# 교육부사이버안전센터 \n",
- "1 Malwr-Sysinfo-29-01-json.16101205ECSC# 교육부사이버안전센터 \n",
- "2 Attack-WebScan-29-01-download(passwd).19092602... 교육부사이버안전센터 \n",
- "3 Attack-Scan-29-01-botnet(satori).19061302ECSC# 교육부사이버안전센터 \n",
- "4 Malwr-Resource-29-01-photo.scr.16053101ECSC# 교육부사이버안전센터 \n",
- "\n",
- " ACCD_FIND_MTD_CODE RISK_V2 \\\n",
- "0 1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... \n",
- "1 1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... \n",
- "2 1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... \n",
- "3 1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... \n",
- "4 1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... \n",
- "\n",
- " TW_DMG_CT_NM ACCD_CLOSE_PROCESS_CODE TW_DMG_GEOLOCATION ... \\\n",
- "0 독일 1 51.2993, 9.491 ... \n",
- "1 대한민국 1 37.5985, 126.9783 ... \n",
- "2 대한민국 1 37.5112, 126.97409999999999 ... \n",
- "3 대한민국 1 37.5985, 126.9783 ... \n",
- "4 대한민국 1 37.5112, 126.97409999999999 ... \n",
- "\n",
- " ACCD_EMAIL DRULE_NM \\\n",
- "0 ryu@kaywon.ac.kr Malwr-Resource-29-01-coinminer.18033003ECSC# \n",
- "1 hk8120@kookmin.ac.kr Malwr-Sysinfo-29-01-json.16101205ECSC# \n",
- "2 sjsong@swu.ac.kr Attack-WebScan-29-01-download(passwd).19092602... \n",
- "3 snucert@snu.ac.kr Attack-Scan-29-01-botnet(satori).19061302ECSC# \n",
- "4 sklee82@deu.ac.kr Malwr-Resource-29-01-photo.scr.16053101ECSC# \n",
- "\n",
- " GEAR_CODE USER_RISK_V2 \\\n",
- "0 73017000 {'WEEKNESS_VAL_4': '0', 'WEEKNESS_VAL_5': '0',... \n",
- "1 53026000 {'WEEKNESS_VAL_4': '0', 'WEEKNESS_VAL_5': '0',... \n",
- "2 53061000 {'WEEKNESS_VAL_4': '0', 'WEEKNESS_VAL_5': '0',... \n",
- "3 A000001609-N-00005 {'WEEKNESS_VAL_4': '0', 'WEEKNESS_VAL_5': '0',... \n",
- "4 A000001146-N-00002 {'WEEKNESS_VAL_4': '0', 'WEEKNESS_VAL_5': '0',... \n",
- "\n",
- " ACCD_DCL_REPORTER_POSITION TW_ATT_IP ACCD_NCSC_TRANSFER_NO \\\n",
- "0 선임 117.17.214.4 T20-043000077 \n",
- "1 사원 104.18.56.68 T20-050500085 \n",
- "2 사원 103.115.42.158 T20-010400003 \n",
- "3 사원 78.151.86.161 T20-010400014 \n",
- "4 선임 213.230.84.85 T20-050600072 \n",
- "\n",
- " TW_MALIG_FILE_BINARY INST_TYPE_DETAIL_CODE IS_DETAIL \n",
- "0 NaN NaN NaN \n",
- "1 NaN NaN NaN \n",
- "2 NaN NaN NaN \n",
- "3 NaN NaN NaN \n",
- "4 NaN NaN NaN \n",
- "\n",
- "[5 rows x 134 columns]\n"
- ]
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 348,
- "source": [
- "df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()\n",
- "len(df)\n",
- "df.head()"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>RISK_V2</th>\n",
- " <th>INST_NM</th>\n",
- " <th>DRULE_ATT_TYPE_CODE1</th>\n",
- " <th>TW_ATT_IP</th>\n",
- " <th>TW_ATT_PORT</th>\n",
- " <th>TW_DMG_IP</th>\n",
- " <th>TW_DMG_PORT</th>\n",
- " <th>ACCD_DMG_PROTO_NM</th>\n",
- " <th>TW_ATT_CT_NM</th>\n",
- " <th>ACCD_FIND_MTD_CODE</th>\n",
- " <th>DRULE_NM</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>계원예술대학교</td>\n",
- " <td>Malwr</td>\n",
- " <td>117.17.214.4</td>\n",
- " <td>2846</td>\n",
- " <td>5.9.163.18</td>\n",
- " <td>50001</td>\n",
- " <td>TCP</td>\n",
- " <td>대한민국</td>\n",
- " <td>1</td>\n",
- " <td>Malwr-Resource-29-01-coinminer.18033003ECSC#</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>국민대학교</td>\n",
- " <td>Malwr</td>\n",
- " <td>104.18.56.68</td>\n",
- " <td>80</td>\n",
- " <td>210.123.34.247</td>\n",
- " <td>51318</td>\n",
- " <td>TCP</td>\n",
- " <td>미국</td>\n",
- " <td>1</td>\n",
- " <td>Malwr-Sysinfo-29-01-json.16101205ECSC#</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>서울여자대학교</td>\n",
- " <td>Attack</td>\n",
- " <td>103.115.42.158</td>\n",
- " <td>46728</td>\n",
- " <td>203.246.40.25</td>\n",
- " <td>80</td>\n",
- " <td>TCP</td>\n",
- " <td>중국</td>\n",
- " <td>1</td>\n",
- " <td>Attack-WebScan-29-01-download(passwd).19092602...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>서울대학교</td>\n",
- " <td>Attack</td>\n",
- " <td>78.151.86.161</td>\n",
- " <td>42911</td>\n",
- " <td>147.46.10.138</td>\n",
- " <td>80</td>\n",
- " <td>TCP</td>\n",
- " <td>영국</td>\n",
- " <td>1</td>\n",
- " <td>Attack-Scan-29-01-botnet(satori).19061302ECSC#</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>동의대학교</td>\n",
- " <td>Malwr</td>\n",
- " <td>213.230.84.85</td>\n",
- " <td>3666</td>\n",
- " <td>114.70.187.161</td>\n",
- " <td>49460</td>\n",
- " <td>TCP</td>\n",
- " <td>우즈베키스탄</td>\n",
- " <td>1</td>\n",
- " <td>Malwr-Resource-29-01-photo.scr.16053101ECSC#</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " RISK_V2 INST_NM \\\n",
- "0 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 계원예술대학교 \n",
- "1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 국민대학교 \n",
- "2 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 서울여자대학교 \n",
- "3 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 서울대학교 \n",
- "4 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 동의대학교 \n",
- "\n",
- " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP \\\n",
- "0 Malwr 117.17.214.4 2846 5.9.163.18 \n",
- "1 Malwr 104.18.56.68 80 210.123.34.247 \n",
- "2 Attack 103.115.42.158 46728 203.246.40.25 \n",
- "3 Attack 78.151.86.161 42911 147.46.10.138 \n",
- "4 Malwr 213.230.84.85 3666 114.70.187.161 \n",
- "\n",
- " TW_DMG_PORT ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
- "0 50001 TCP 대한민국 1 \n",
- "1 51318 TCP 미국 1 \n",
- "2 80 TCP 중국 1 \n",
- "3 80 TCP 영국 1 \n",
- "4 49460 TCP 우즈베키스탄 1 \n",
- "\n",
- " DRULE_NM \n",
- "0 Malwr-Resource-29-01-coinminer.18033003ECSC# \n",
- "1 Malwr-Sysinfo-29-01-json.16101205ECSC# \n",
- "2 Attack-WebScan-29-01-download(passwd).19092602... \n",
- "3 Attack-Scan-29-01-botnet(satori).19061302ECSC# \n",
- "4 Malwr-Resource-29-01-photo.scr.16053101ECSC# "
- ]
- },
- "metadata": {},
- "execution_count": 348
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 349,
- "source": [
- "##################### NTM section #####################"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 350,
- "source": [
- "NTM_df=df[df['ACCD_FIND_MTD_CODE']=='1']\n",
- "len(NTM_df)"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "9894"
- ]
- },
- "metadata": {},
- "execution_count": 350
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 351,
- "source": [
- "# Pick out it in order to get the asset, risk, intent, black IP out\n",
- "RISK_V2=NTM_df['RISK_V2']\n",
- "\n",
- "RISK_V2_FILTERED=RISK_V2.dropna()\n",
- "print(RISK_V2.size)\n",
- "print(RISK_V2_FILTERED.size)\n",
- "\n",
- "\n",
- "\n",
- "\n"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "9894\n",
- "9894\n"
- ]
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 352,
- "source": [
- "def filter_assets_value(risk):\n",
- " risks=[]\n",
- " try:\n",
- " for risk_key in risk:\n",
- " if 'ASSETS_VAL_' in risk_key and risk[risk_key]:\n",
- " risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)\n",
- " risks.append(risk_key_desc)\n",
- " except:\n",
- " print(risk)\n",
- " print(type(risk))\n",
- " finally:\n",
- " return risks\n",
- " \n",
- " "
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 353,
- "source": [
- "# modified\n",
- "def get_asset_desc(asset_field):\n",
- " if asset_field == 'ASSETS_VAL_1':\n",
- " return '공인-전체IP대역(유선)'\n",
- " elif asset_field == 'ASSETS_VAL_2':\n",
- " return '공인-전체IP대역(무선)'\n",
- " elif asset_field == 'ASSETS_VAL_3':\n",
- " return '공인-WEB서버'\n",
- " elif asset_field == 'ASSETS_VAL_4':\n",
- " return '공인-내부응용서버'\n",
- " elif asset_field == 'ASSETS_VAL_5':\n",
- " return '공인-DB서버'\n",
- " elif asset_field == 'ASSETS_VAL_6':\n",
- " return '공인-패치서버'\n",
- " elif asset_field == 'ASSETS_VAL_7':\n",
- " return '공인-네트워크'\n",
- " elif asset_field == 'ASSETS_VAL_8':\n",
- " return '공인-보안'\n",
- " elif asset_field == 'ASSETS_VAL_9':\n",
- " return '공인-업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_10':\n",
- " return '공인-비업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_11':\n",
- " return '공인-기타'\n",
- " elif asset_field == 'ASSETS_VAL_12':\n",
- " return '사설-전체IP대역(유선)'\n",
- " elif asset_field == 'ASSETS_VAL_13':\n",
- " return '사설-전체IP대역(무선)'\n",
- " elif asset_field == 'ASSETS_VAL_14':\n",
- " return '사설-WEB서버'\n",
- " elif asset_field == 'ASSETS_VAL_15':\n",
- " return '사설-내부응용서버'\n",
- " elif asset_field == 'ASSETS_VAL_16':\n",
- " return '사설-DB서버'\n",
- " elif asset_field == 'ASSETS_VAL_17':\n",
- " return '사설-패치서버'\n",
- " elif asset_field == 'ASSETS_VAL_18':\n",
- " return '사설-네트워크'\n",
- " elif asset_field == 'ASSETS_VAL_19':\n",
- " return '사설-보안'\n",
- " elif asset_field == 'ASSETS_VAL_20':\n",
- " return '사설-업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_21':\n",
- " return '사설-비업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_22':\n",
- " return '사설-기타'\n",
- " else:\n",
- " return ''"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 354,
- "source": [
- "# New assets column\n",
- "NTM_df['ASSETS_VAL']=list(map(filter_assets_value, RISK_V2_FILTERED))\n",
- "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)\n",
- "NTM_df[:1]"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "<ipython-input-354-a30068447e14>:2: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['ASSETS_VAL']=list(map(filter_assets_value, RISK_V2_FILTERED))\n",
- "<ipython-input-354-a30068447e14>:3: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>RISK_V2</th>\n",
- " <th>INST_NM</th>\n",
- " <th>DRULE_ATT_TYPE_CODE1</th>\n",
- " <th>TW_ATT_IP</th>\n",
- " <th>TW_ATT_PORT</th>\n",
- " <th>TW_DMG_IP</th>\n",
- " <th>TW_DMG_PORT</th>\n",
- " <th>ACCD_DMG_PROTO_NM</th>\n",
- " <th>TW_ATT_CT_NM</th>\n",
- " <th>ACCD_FIND_MTD_CODE</th>\n",
- " <th>DRULE_NM</th>\n",
- " <th>ASSETS_VAL</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>계원예술대학교</td>\n",
- " <td>Malwr</td>\n",
- " <td>117.17.214.4</td>\n",
- " <td>2846</td>\n",
- " <td>5.9.163.18</td>\n",
- " <td>50001</td>\n",
- " <td>TCP</td>\n",
- " <td>대한민국</td>\n",
- " <td>1</td>\n",
- " <td>Malwr-Resource-29-01-coinminer.18033003ECSC#</td>\n",
- " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " RISK_V2 INST_NM \\\n",
- "0 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 계원예술대학교 \n",
- "\n",
- " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP TW_DMG_PORT \\\n",
- "0 Malwr 117.17.214.4 2846 5.9.163.18 50001 \n",
- "\n",
- " ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
- "0 TCP 대한민국 1 \n",
- "\n",
- " DRULE_NM \\\n",
- "0 Malwr-Resource-29-01-coinminer.18033003ECSC# \n",
- "\n",
- " ASSETS_VAL \n",
- "0 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] "
- ]
- },
- "metadata": {},
- "execution_count": 354
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 355,
- "source": [
- "# modified\n",
- "def filter_intent(intent):\n",
- " intents=[]\n",
- " for intent_key in intent:\n",
- " if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
- " intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
- " intents.append(intent_key_desc)\n",
- " return intents"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 356,
- "source": [
- "def get_intent_desc(intent_field):\n",
- " if intent_field == 'INTENT_VAL_1':\n",
- " return '파괴'\n",
- " elif intent_field == 'INTENT_VAL_2':\n",
- " return '유출'\n",
- " elif intent_field == 'INTENT_VAL_3':\n",
- " return '지연'\n",
- " elif intent_field == 'INTENT_VAL_4':\n",
- " return '잠복'\n",
- " elif intent_field == 'INTENT_VAL_5':\n",
- " return '단순침입'\n",
- " elif intent_field == 'INTENT_VAL_6':\n",
- " return 'MD5'\n",
- " elif intent_field == 'INTENT_VAL_0':\n",
- " return 'Default'\n",
- " else:\n",
- " return ''"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 357,
- "source": [
- "# New column of intent value\n",
- "NTM_df['INTENT_VAL']=list(map(filter_intent, RISK_V2_FILTERED))\n",
- "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)\n",
- "NTM_df[:1]"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "<ipython-input-357-8a9ca54377be>:2: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['INTENT_VAL']=list(map(filter_intent, RISK_V2_FILTERED))\n",
- "<ipython-input-357-8a9ca54377be>:3: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>RISK_V2</th>\n",
- " <th>INST_NM</th>\n",
- " <th>DRULE_ATT_TYPE_CODE1</th>\n",
- " <th>TW_ATT_IP</th>\n",
- " <th>TW_ATT_PORT</th>\n",
- " <th>TW_DMG_IP</th>\n",
- " <th>TW_DMG_PORT</th>\n",
- " <th>ACCD_DMG_PROTO_NM</th>\n",
- " <th>TW_ATT_CT_NM</th>\n",
- " <th>ACCD_FIND_MTD_CODE</th>\n",
- " <th>DRULE_NM</th>\n",
- " <th>ASSETS_VAL</th>\n",
- " <th>INTENT_VAL</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>계원예술대학교</td>\n",
- " <td>Malwr</td>\n",
- " <td>117.17.214.4</td>\n",
- " <td>2846</td>\n",
- " <td>5.9.163.18</td>\n",
- " <td>50001</td>\n",
- " <td>TCP</td>\n",
- " <td>대한민국</td>\n",
- " <td>1</td>\n",
- " <td>Malwr-Resource-29-01-coinminer.18033003ECSC#</td>\n",
- " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
- " <td>['RISK_V2.INTENT_VAL_3=지연']</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " RISK_V2 INST_NM \\\n",
- "0 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 계원예술대학교 \n",
- "\n",
- " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP TW_DMG_PORT \\\n",
- "0 Malwr 117.17.214.4 2846 5.9.163.18 50001 \n",
- "\n",
- " ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
- "0 TCP 대한민국 1 \n",
- "\n",
- " DRULE_NM \\\n",
- "0 Malwr-Resource-29-01-coinminer.18033003ECSC# \n",
- "\n",
- " ASSETS_VAL INTENT_VAL \n",
- "0 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_3=지연'] "
- ]
- },
- "metadata": {},
- "execution_count": 357
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 358,
- "source": [
- "# modified\n",
- "def filter_source(source):\n",
- " sources=[]\n",
- " for source_key in source:\n",
- " if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
- " source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
- " sources.append(source_key_desc)\n",
- " return sources"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 359,
- "source": [
- "def get_source_desc(source_field):\n",
- " if source_field=='SOURCE_VAL_1':\n",
- " return '북한IP'\n",
- " if source_field=='SOURCE_VAL_3':\n",
- " return 'ECSC Black IP'\n",
- " else:\n",
- " return ''"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 360,
- "source": [
- "# New column of SOURCE_VAL value\n",
- "NTM_df['SOURCE_VAL']=list(map(filter_source, RISK_V2_FILTERED))\n",
- "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n",
- "NTM_df[:5]"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "<ipython-input-360-3b33b89bc3d3>:2: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['SOURCE_VAL']=list(map(filter_source, RISK_V2_FILTERED))\n",
- "<ipython-input-360-3b33b89bc3d3>:3: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>RISK_V2</th>\n",
- " <th>INST_NM</th>\n",
- " <th>DRULE_ATT_TYPE_CODE1</th>\n",
- " <th>TW_ATT_IP</th>\n",
- " <th>TW_ATT_PORT</th>\n",
- " <th>TW_DMG_IP</th>\n",
- " <th>TW_DMG_PORT</th>\n",
- " <th>ACCD_DMG_PROTO_NM</th>\n",
- " <th>TW_ATT_CT_NM</th>\n",
- " <th>ACCD_FIND_MTD_CODE</th>\n",
- " <th>DRULE_NM</th>\n",
- " <th>ASSETS_VAL</th>\n",
- " <th>INTENT_VAL</th>\n",
- " <th>SOURCE_VAL</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>계원예술대학교</td>\n",
- " <td>Malwr</td>\n",
- " <td>117.17.214.4</td>\n",
- " <td>2846</td>\n",
- " <td>5.9.163.18</td>\n",
- " <td>50001</td>\n",
- " <td>TCP</td>\n",
- " <td>대한민국</td>\n",
- " <td>1</td>\n",
- " <td>Malwr-Resource-29-01-coinminer.18033003ECSC#</td>\n",
- " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
- " <td>['RISK_V2.INTENT_VAL_3=지연']</td>\n",
- " <td>[]</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>국민대학교</td>\n",
- " <td>Malwr</td>\n",
- " <td>104.18.56.68</td>\n",
- " <td>80</td>\n",
- " <td>210.123.34.247</td>\n",
- " <td>51318</td>\n",
- " <td>TCP</td>\n",
- " <td>미국</td>\n",
- " <td>1</td>\n",
- " <td>Malwr-Sysinfo-29-01-json.16101205ECSC#</td>\n",
- " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
- " <td>['RISK_V2.INTENT_VAL_2=유출']</td>\n",
- " <td>[]</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>서울여자대학교</td>\n",
- " <td>Attack</td>\n",
- " <td>103.115.42.158</td>\n",
- " <td>46728</td>\n",
- " <td>203.246.40.25</td>\n",
- " <td>80</td>\n",
- " <td>TCP</td>\n",
- " <td>중국</td>\n",
- " <td>1</td>\n",
- " <td>Attack-WebScan-29-01-download(passwd).19092602...</td>\n",
- " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V...</td>\n",
- " <td>['RISK_V2.INTENT_VAL_5=단순침입']</td>\n",
- " <td>[]</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>서울대학교</td>\n",
- " <td>Attack</td>\n",
- " <td>78.151.86.161</td>\n",
- " <td>42911</td>\n",
- " <td>147.46.10.138</td>\n",
- " <td>80</td>\n",
- " <td>TCP</td>\n",
- " <td>영국</td>\n",
- " <td>1</td>\n",
- " <td>Attack-Scan-29-01-botnet(satori).19061302ECSC#</td>\n",
- " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V...</td>\n",
- " <td>['RISK_V2.INTENT_VAL_5=단순침입']</td>\n",
- " <td>[]</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>동의대학교</td>\n",
- " <td>Malwr</td>\n",
- " <td>213.230.84.85</td>\n",
- " <td>3666</td>\n",
- " <td>114.70.187.161</td>\n",
- " <td>49460</td>\n",
- " <td>TCP</td>\n",
- " <td>우즈베키스탄</td>\n",
- " <td>1</td>\n",
- " <td>Malwr-Resource-29-01-photo.scr.16053101ECSC#</td>\n",
- " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V...</td>\n",
- " <td>['RISK_V2.INTENT_VAL_3=지연']</td>\n",
- " <td>[]</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " RISK_V2 INST_NM \\\n",
- "0 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 계원예술대학교 \n",
- "1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 국민대학교 \n",
- "2 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 서울여자대학교 \n",
- "3 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 서울대학교 \n",
- "4 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 동의대학교 \n",
- "\n",
- " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP \\\n",
- "0 Malwr 117.17.214.4 2846 5.9.163.18 \n",
- "1 Malwr 104.18.56.68 80 210.123.34.247 \n",
- "2 Attack 103.115.42.158 46728 203.246.40.25 \n",
- "3 Attack 78.151.86.161 42911 147.46.10.138 \n",
- "4 Malwr 213.230.84.85 3666 114.70.187.161 \n",
- "\n",
- " TW_DMG_PORT ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
- "0 50001 TCP 대한민국 1 \n",
- "1 51318 TCP 미국 1 \n",
- "2 80 TCP 중국 1 \n",
- "3 80 TCP 영국 1 \n",
- "4 49460 TCP 우즈베키스탄 1 \n",
- "\n",
- " DRULE_NM \\\n",
- "0 Malwr-Resource-29-01-coinminer.18033003ECSC# \n",
- "1 Malwr-Sysinfo-29-01-json.16101205ECSC# \n",
- "2 Attack-WebScan-29-01-download(passwd).19092602... \n",
- "3 Attack-Scan-29-01-botnet(satori).19061302ECSC# \n",
- "4 Malwr-Resource-29-01-photo.scr.16053101ECSC# \n",
- "\n",
- " ASSETS_VAL \\\n",
- "0 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] \n",
- "1 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] \n",
- "2 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V... \n",
- "3 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V... \n",
- "4 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V... \n",
- "\n",
- " INTENT_VAL SOURCE_VAL \n",
- "0 ['RISK_V2.INTENT_VAL_3=지연'] [] \n",
- "1 ['RISK_V2.INTENT_VAL_2=유출'] [] \n",
- "2 ['RISK_V2.INTENT_VAL_5=단순침입'] [] \n",
- "3 ['RISK_V2.INTENT_VAL_5=단순침입'] [] \n",
- "4 ['RISK_V2.INTENT_VAL_3=지연'] [] "
- ]
- },
- "metadata": {},
- "execution_count": 360
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 361,
- "source": [
- "NTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
- "NTM_df.columns"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/Users/joohyunyoon/.pyenv/versions/anaconda3-2021.05/lib/python3.8/site-packages/pandas/core/frame.py:4308: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " return super().drop(\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "Index(['INST_NM', 'DRULE_ATT_TYPE_CODE1', 'TW_ATT_IP', 'TW_ATT_PORT',\n",
- " 'TW_DMG_IP', 'TW_DMG_PORT', 'ACCD_DMG_PROTO_NM', 'TW_ATT_CT_NM',\n",
- " 'ACCD_FIND_MTD_CODE', 'DRULE_NM', 'ASSETS_VAL', 'INTENT_VAL',\n",
- " 'SOURCE_VAL'],\n",
- " dtype='object')"
- ]
- },
- "metadata": {},
- "execution_count": 361
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 362,
- "source": [
- "# It should be 13 columns in total\n",
- "\n",
- "# 1. 기관 INST_NM\n",
- "# 2. 공격 DRULE_ATT_TYPE_CODE1\n",
- "# 3. 자산 ASSETS_VAL\n",
- "# 4. 위협공격ip TW_ATT_IP\n",
- "# 5. 위협공격port TW_ATT_PORT\n",
- "# 6. 위협피해ip TW_DMG_IP\n",
- "# 7. 위협피해port TW_DMG_PORT\n",
- "# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM\n",
- "# 9. 공격국가 TW_ATT_CT_NM\n",
- "# 10. 의도(7개) INTENT_VAL\n",
- "# 11. IP/URL 가중치 SOURCE_VAL\n",
- "# 12. 장비 ACCD_FIND_MTD_CODE\n",
- "# 13. 탐지규칙명 DRULE_NM"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "markdown",
- "source": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 363,
- "source": [
- "NTM_df.isna().sum()"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "INST_NM 0\n",
- "DRULE_ATT_TYPE_CODE1 0\n",
- "TW_ATT_IP 0\n",
- "TW_ATT_PORT 0\n",
- "TW_DMG_IP 0\n",
- "TW_DMG_PORT 0\n",
- "ACCD_DMG_PROTO_NM 0\n",
- "TW_ATT_CT_NM 0\n",
- "ACCD_FIND_MTD_CODE 0\n",
- "DRULE_NM 0\n",
- "ASSETS_VAL 0\n",
- "INTENT_VAL 0\n",
- "SOURCE_VAL 0\n",
- "dtype: int64"
- ]
- },
- "metadata": {},
- "execution_count": 363
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 364,
- "source": [
- "# Change the Nan to zero\n",
- "NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
- "NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')\n",
- "NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
- "NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
- "NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
- "NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
- "NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
- "NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
- "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
- "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)\n",
- "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
- "NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "<ipython-input-364-a775405fb7f0>:2: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
- "<ipython-input-364-a775405fb7f0>:3: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')\n",
- "<ipython-input-364-a775405fb7f0>:4: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
- "<ipython-input-364-a775405fb7f0>:5: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
- "<ipython-input-364-a775405fb7f0>:6: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
- "<ipython-input-364-a775405fb7f0>:7: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
- "<ipython-input-364-a775405fb7f0>:8: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
- "<ipython-input-364-a775405fb7f0>:9: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
- "<ipython-input-364-a775405fb7f0>:10: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
- "<ipython-input-364-a775405fb7f0>:11: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)\n",
- "<ipython-input-364-a775405fb7f0>:12: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
- "<ipython-input-364-a775405fb7f0>:13: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')\n"
- ]
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 365,
- "source": [
- "# Check NaN out again\n",
- "NTM_df.isna().sum()"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "INST_NM 0\n",
- "DRULE_ATT_TYPE_CODE1 0\n",
- "TW_ATT_IP 0\n",
- "TW_ATT_PORT 0\n",
- "TW_DMG_IP 0\n",
- "TW_DMG_PORT 0\n",
- "ACCD_DMG_PROTO_NM 0\n",
- "TW_ATT_CT_NM 0\n",
- "ACCD_FIND_MTD_CODE 0\n",
- "DRULE_NM 0\n",
- "ASSETS_VAL 0\n",
- "INTENT_VAL 0\n",
- "SOURCE_VAL 0\n",
- "dtype: int64"
- ]
- },
- "metadata": {},
- "execution_count": 365
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 366,
- "source": [
- "# # Merge all\n",
- "\n",
- "# # Make one string from all of elements\n",
- "NTM_df['Combined']=NTM_df['INST_NM'].astype(str)+' '+NTM_df['TW_ATT_IP'].astype(str)+' '+NTM_df['TW_ATT_PORT'].astype(str)+' '+NTM_df['TW_DMG_IP'].astype(str)+' '+NTM_df['TW_DMG_PORT'].astype(str) +' '+NTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+NTM_df['TW_ATT_CT_NM']+' '+NTM_df['ASSETS_VAL']+' '+NTM_df['INTENT_VAL']+' '+NTM_df['SOURCE_VAL']+' '+NTM_df['DRULE_ATT_TYPE_CODE1']+' '+NTM_df['DRULE_NM']\n",
- "\n",
- "NTM_com=NTM_df['Combined']\n",
- "NTM_com[:10]\n"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "<ipython-input-366-d53cc1e7cfac>:4: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " NTM_df['Combined']=NTM_df['INST_NM'].astype(str)+' '+NTM_df['TW_ATT_IP'].astype(str)+' '+NTM_df['TW_ATT_PORT'].astype(str)+' '+NTM_df['TW_DMG_IP'].astype(str)+' '+NTM_df['TW_DMG_PORT'].astype(str) +' '+NTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+NTM_df['TW_ATT_CT_NM']+' '+NTM_df['ASSETS_VAL']+' '+NTM_df['INTENT_VAL']+' '+NTM_df['SOURCE_VAL']+' '+NTM_df['DRULE_ATT_TYPE_CODE1']+' '+NTM_df['DRULE_NM']\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "0 계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TCP...\n",
- "1 국민대학교 104.18.56.68 80 210.123.34.247 51318 TCP...\n",
- "2 서울여자대학교 103.115.42.158 46728 203.246.40.25 80 ...\n",
- "3 서울대학교 78.151.86.161 42911 147.46.10.138 80 TCP...\n",
- "4 동의대학교 213.230.84.85 3666 114.70.187.161 49460 ...\n",
- "5 원광대학교 34.70.128.113 63026 123.108.17.41 80 TCP...\n",
- "6 중원대학교 36.92.9.106 44683 121.191.149.203 80 TCP...\n",
- "7 서울대학교 45.55.60.73 55082 147.47.106.234 80 TCP ...\n",
- "9 숙명여자대학교 192.144.207.37 10909 203.252.201.16 80...\n",
- "10 서울특별시교육청 218.153.168.50 11422 202.171.250.27 8...\n",
- "Name: Combined, dtype: object"
- ]
- },
- "metadata": {},
- "execution_count": 366
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 367,
- "source": [
- "# Change the type to DataFrame\n",
- "NTM_to_df=pd.DataFrame(NTM_com)\n",
- "NTM_to_df[:5]"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Combined</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TCP...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>국민대학교 104.18.56.68 80 210.123.34.247 51318 TCP...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>서울여자대학교 103.115.42.158 46728 203.246.40.25 80 ...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>서울대학교 78.151.86.161 42911 147.46.10.138 80 TCP...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>동의대학교 213.230.84.85 3666 114.70.187.161 49460 ...</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Combined\n",
- "0 계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TCP...\n",
- "1 국민대학교 104.18.56.68 80 210.123.34.247 51318 TCP...\n",
- "2 서울여자대학교 103.115.42.158 46728 203.246.40.25 80 ...\n",
- "3 서울대학교 78.151.86.161 42911 147.46.10.138 80 TCP...\n",
- "4 동의대학교 213.230.84.85 3666 114.70.187.161 49460 ..."
- ]
- },
- "metadata": {},
- "execution_count": 367
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 368,
- "source": [
- "# Change the type to list in order to apply the algorithm(nested list)\n",
- "NTM_tolist=NTM_to_df.values.tolist()\n",
- "NTM_tolist[:5]"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "[[\"계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_3=지연'] [] Malwr Malwr-Resource-29-01-coinminer.18033003ECSC#\"],\n",
- " [\"국민대학교 104.18.56.68 80 210.123.34.247 51318 TCP 미국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_2=유출'] [] Malwr Malwr-Sysinfo-29-01-json.16101205ECSC#\"],\n",
- " [\"서울여자대학교 103.115.42.158 46728 203.246.40.25 80 TCP 중국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_4=공인-내부응용서버'] ['RISK_V2.INTENT_VAL_5=단순침입'] [] Attack Attack-WebScan-29-01-download(passwd).19092602ECSC#\"],\n",
- " [\"서울대학교 78.151.86.161 42911 147.46.10.138 80 TCP 영국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_10=공인-비업무용PC'] ['RISK_V2.INTENT_VAL_5=단순침입'] [] Attack Attack-Scan-29-01-botnet(satori).19061302ECSC#\"],\n",
- " [\"동의대학교 213.230.84.85 3666 114.70.187.161 49460 TCP 우즈베키스탄 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_9=공인-업무용PC'] ['RISK_V2.INTENT_VAL_3=지연'] [] Malwr Malwr-Resource-29-01-photo.scr.16053101ECSC#\"]]"
- ]
- },
- "metadata": {},
- "execution_count": 368
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 369,
- "source": [
- "from prefixspan import PrefixSpan"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 370,
- "source": [
- "# Apply prefixspan\n",
- "PrefixSpan_NTM = PrefixSpan(NTM_tolist)\n",
- "\n",
- "###### Interchangeable ######\n",
- "# Get any over frequency 1 \n",
- "prefix_NTM=PrefixSpan_NTM.frequent(1)\n",
- "prefix_NTM[:3]"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "[(1,\n",
- " [\"계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_3=지연'] [] Malwr Malwr-Resource-29-01-coinminer.18033003ECSC#\"]),\n",
- " (1,\n",
- " [\"국민대학교 104.18.56.68 80 210.123.34.247 51318 TCP 미국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_2=유출'] [] Malwr Malwr-Sysinfo-29-01-json.16101205ECSC#\"]),\n",
- " (1,\n",
- " [\"서울여자대학교 103.115.42.158 46728 203.246.40.25 80 TCP 중국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_4=공인-내부응용서버'] ['RISK_V2.INTENT_VAL_5=단순침입'] [] Attack Attack-WebScan-29-01-download(passwd).19092602ECSC#\"])]"
- ]
- },
- "metadata": {},
- "execution_count": 370
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 371,
- "source": [
- "# Put the result to DataFrame\n",
- "prefix_NTM_df=pd.DataFrame(prefix_NTM)\n",
- "prefix_NTM_df[:5]"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>0</th>\n",
- " <th>1</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>1</td>\n",
- " <td>[계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>1</td>\n",
- " <td>[국민대학교 104.18.56.68 80 210.123.34.247 51318 TC...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>1</td>\n",
- " <td>[서울여자대학교 103.115.42.158 46728 203.246.40.25 80...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>1</td>\n",
- " <td>[서울대학교 78.151.86.161 42911 147.46.10.138 80 TC...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>1</td>\n",
- " <td>[동의대학교 213.230.84.85 3666 114.70.187.161 49460...</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " 0 1\n",
- "0 1 [계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC...\n",
- "1 1 [국민대학교 104.18.56.68 80 210.123.34.247 51318 TC...\n",
- "2 1 [서울여자대학교 103.115.42.158 46728 203.246.40.25 80...\n",
- "3 1 [서울대학교 78.151.86.161 42911 147.46.10.138 80 TC...\n",
- "4 1 [동의대학교 213.230.84.85 3666 114.70.187.161 49460..."
- ]
- },
- "metadata": {},
- "execution_count": 371
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 372,
- "source": [
- "# Change the columns name\n",
- "prefix_NTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
- "\n",
- "# Make the new column for filling the Effect\n",
- "prefix_NTM_df['Effect']=np.nan\n",
- "\n",
- "# Change the order of columns\n",
- "prefix_NTM_df=prefix_NTM_df[['Cause','Effect','Frequency']]\n",
- "prefix_NTM_df[:2]"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Cause</th>\n",
- " <th>Effect</th>\n",
- " <th>Frequency</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>[계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC...</td>\n",
- " <td>NaN</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>[국민대학교 104.18.56.68 80 210.123.34.247 51318 TC...</td>\n",
- " <td>NaN</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Cause Effect Frequency\n",
- "0 [계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC... NaN 1\n",
- "1 [국민대학교 104.18.56.68 80 210.123.34.247 51318 TC... NaN 1"
- ]
- },
- "metadata": {},
- "execution_count": 372
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 373,
- "source": [
- "# Define the function that find the rule name \n",
- "def generate_cause(cell):\n",
- " drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
- " for drule in drules:\n",
- " if ' '+drule in cell[0]:\n",
- " return drule \n",
- " return ''\n",
- " \n",
- "# Mapping the rule name with cause that is the effect\n",
- "effect=list(map(generate_cause, prefix_NTM_df.Cause))\n",
- "\n",
- "# Assign the rule name as an effect\n",
- "prefix_NTM_df['Effect']=effect\n",
- "prefix_NTM_df.sort_values(by=['Frequency'],ascending=False)"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Cause</th>\n",
- " <th>Effect</th>\n",
- " <th>Frequency</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>393</th>\n",
- " <td>[중앙대학교 165.194.8.12 49159 219.153.48.112 1188 ...</td>\n",
- " <td>Malwr</td>\n",
- " <td>4</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1831</th>\n",
- " <td>[상지대학교 158.69.38.240 62834 220.149.189.72 80 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6517</th>\n",
- " <td>[중앙대학교 165.194.31.20 49157 219.153.48.112 1188...</td>\n",
- " <td>Malwr</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4198</th>\n",
- " <td>[한세대학교 210.12.116.124 22564 220.68.231.48 80 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>[계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC...</td>\n",
- " <td>Malwr</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3298</th>\n",
- " <td>[경기대학교 50.62.176.64 40577 203.249.22.27 80 TCP...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3299</th>\n",
- " <td>[한세대학교 103.242.119.217 44889 220.68.249.69 80 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3300</th>\n",
- " <td>[동의대학교 185.247.181.7 37614 113.198.245.31 80 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3301</th>\n",
- " <td>[숙명여자대학교 46.119.174.102 51876 203.252.201.77 8...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9887</th>\n",
- " <td>[아주대학교 187.101.10.229 49802 202.30.20.81 80 TC...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>9888 rows × 3 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " Cause Effect Frequency\n",
- "393 [중앙대학교 165.194.8.12 49159 219.153.48.112 1188 ... Malwr 4\n",
- "1831 [상지대학교 158.69.38.240 62834 220.149.189.72 80 T... Attack 2\n",
- "6517 [중앙대학교 165.194.31.20 49157 219.153.48.112 1188... Malwr 2\n",
- "4198 [한세대학교 210.12.116.124 22564 220.68.231.48 80 T... Attack 2\n",
- "0 [계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC... Malwr 1\n",
- "... ... ... ...\n",
- "3298 [경기대학교 50.62.176.64 40577 203.249.22.27 80 TCP... Attack 1\n",
- "3299 [한세대학교 103.242.119.217 44889 220.68.249.69 80 ... Attack 1\n",
- "3300 [동의대학교 185.247.181.7 37614 113.198.245.31 80 T... Attack 1\n",
- "3301 [숙명여자대학교 46.119.174.102 51876 203.252.201.77 8... Attack 1\n",
- "9887 [아주대학교 187.101.10.229 49802 202.30.20.81 80 TC... Attack 1\n",
- "\n",
- "[9888 rows x 3 columns]"
- ]
- },
- "metadata": {},
- "execution_count": 373
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 374,
- "source": [
- "# Attack Filter\n",
- "def Attack_filter(ps):\n",
- " return ' Attack' in ps[0]\n",
- "\n",
- "att_filter=prefix_NTM_df[list(map(Attack_filter, prefix_NTM_df.Cause))].fillna('Attack')\n",
- "\n",
- "# Malwr Filter\n",
- "def Malwr_filter(ps):\n",
- " return ' Malwr' in ps[0]\n",
- "\n",
- "mal_filter=prefix_NTM_df[list(map(Malwr_filter, prefix_NTM_df.Cause))].fillna('Malwr')\n",
- "\n",
- "# DDOS Filter\n",
- "def DDOS_filter(ps):\n",
- " return ' DDOS' in ps[0]\n",
- "\n",
- "dd_filter=prefix_NTM_df[list(map(DDOS_filter, prefix_NTM_df.Cause))].fillna('DDOS')\n",
- "\n",
- "# HACK Filter\n",
- "def HACK_filter(ps):\n",
- " return ' HACK' in ps[0]\n",
- "\n",
- "hack_filter=prefix_NTM_df[list(map(HACK_filter, prefix_NTM_df.Cause))].fillna('HACK')\n",
- "\n",
- "# MAIL Filter\n",
- "def MAIL_filter(ps):\n",
- " return ' MAIL' in ps[0]\n",
- "\n",
- "mail_filter=prefix_NTM_df[list(map(MAIL_filter, prefix_NTM_df.Cause))].fillna('MAIL')\n",
- "\n",
- "# WEB Filter\n",
- "def WEB_filter(ps):\n",
- " return ' WEB' in ps[0]\n",
- "prefix_NTM_df\n",
- "web_filter=prefix_NTM_df[list(map(WEB_filter, prefix_NTM_df.Cause))].fillna('WEB')\n",
- "\n",
- "frames = [att_filter, mal_filter, dd_filter, hack_filter, mail_filter, web_filter]\n",
- "result = pd.concat(frames)\n",
- "result.sort_values(by=['Frequency'],ascending=False)"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Cause</th>\n",
- " <th>Effect</th>\n",
- " <th>Frequency</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>393</th>\n",
- " <td>[중앙대학교 165.194.8.12 49159 219.153.48.112 1188 ...</td>\n",
- " <td>Malwr</td>\n",
- " <td>4</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4198</th>\n",
- " <td>[한세대학교 210.12.116.124 22564 220.68.231.48 80 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6517</th>\n",
- " <td>[중앙대학교 165.194.31.20 49157 219.153.48.112 1188...</td>\n",
- " <td>Malwr</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1831</th>\n",
- " <td>[상지대학교 158.69.38.240 62834 220.149.189.72 80 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8154</th>\n",
- " <td>[고려대학교세종캠퍼스 218.76.223.50 46901 163.152.214.13...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4066</th>\n",
- " <td>[서울대학교 122.51.16.226 60945 147.46.121.22 80 TC...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4067</th>\n",
- " <td>[서울대학교 52.149.163.33 63408 147.47.106.230 80 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4068</th>\n",
- " <td>[충남대학교 108.41.185.191 57224 168.188.40.65 80 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4069</th>\n",
- " <td>[동덕여자대학교 203.30.236.64 62705 210.121.133.59 80...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9882</th>\n",
- " <td>[숙명여자대학교 203.252.200.72 61489 78.47.123.172 45...</td>\n",
- " <td>Malwr</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>9824 rows × 3 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " Cause Effect Frequency\n",
- "393 [중앙대학교 165.194.8.12 49159 219.153.48.112 1188 ... Malwr 4\n",
- "4198 [한세대학교 210.12.116.124 22564 220.68.231.48 80 T... Attack 2\n",
- "6517 [중앙대학교 165.194.31.20 49157 219.153.48.112 1188... Malwr 2\n",
- "1831 [상지대학교 158.69.38.240 62834 220.149.189.72 80 T... Attack 2\n",
- "8154 [고려대학교세종캠퍼스 218.76.223.50 46901 163.152.214.13... Attack 1\n",
- "... ... ... ...\n",
- "4066 [서울대학교 122.51.16.226 60945 147.46.121.22 80 TC... Attack 1\n",
- "4067 [서울대학교 52.149.163.33 63408 147.47.106.230 80 T... Attack 1\n",
- "4068 [충남대학교 108.41.185.191 57224 168.188.40.65 80 T... Attack 1\n",
- "4069 [동덕여자대학교 203.30.236.64 62705 210.121.133.59 80... Attack 1\n",
- "9882 [숙명여자대학교 203.252.200.72 61489 78.47.123.172 45... Malwr 1\n",
- "\n",
- "[9824 rows x 3 columns]"
- ]
- },
- "metadata": {},
- "execution_count": 374
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "source": [
- "##################### NTM section End #####################"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "source": [
- "##################### MTM section #####################"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 375,
- "source": [
- "MTM_df=df[df['ACCD_FIND_MTD_CODE']=='2']\n",
- "len(MTM_df)"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "16"
- ]
- },
- "metadata": {},
- "execution_count": 375
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 376,
- "source": [
- "# Pick out it in order to get the asset, risk, intent, black IP out\n",
- "RISK_V2_MTM=MTM_df['RISK_V2']\n",
- "\n",
- "RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()\n",
- "print(RISK_V2_MTM.size)\n",
- "print(RISK_V2_FILTERED_MTM.size)\n"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "16\n",
- "16\n"
- ]
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 377,
- "source": [
- "def filter_assets_value_MTM(risk):\n",
- " risks=[]\n",
- " try:\n",
- " for risk_key in risk:\n",
- " if 'ASSETS_VAL_' in risk_key and risk[risk_key]:\n",
- " risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)\n",
- " risks.append(risk_key_desc)\n",
- " except:\n",
- " print(risk)\n",
- " print(type(risk))\n",
- " finally:\n",
- " return risks"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 378,
- "source": [
- "# modified\n",
- "def get_asset_desc_MTM(asset_field):\n",
- " if asset_field == 'ASSETS_VAL_1':\n",
- " return '공인-전체IP대역(유선)'\n",
- " elif asset_field == 'ASSETS_VAL_2':\n",
- " return '공인-전체IP대역(무선)'\n",
- " elif asset_field == 'ASSETS_VAL_3':\n",
- " return '공인-WEB서버'\n",
- " elif asset_field == 'ASSETS_VAL_4':\n",
- " return '공인-내부응용서버'\n",
- " elif asset_field == 'ASSETS_VAL_5':\n",
- " return '공인-DB서버'\n",
- " elif asset_field == 'ASSETS_VAL_6':\n",
- " return '공인-패치서버'\n",
- " elif asset_field == 'ASSETS_VAL_7':\n",
- " return '공인-네트워크'\n",
- " elif asset_field == 'ASSETS_VAL_8':\n",
- " return '공인-보안'\n",
- " elif asset_field == 'ASSETS_VAL_9':\n",
- " return '공인-업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_10':\n",
- " return '공인-비업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_11':\n",
- " return '공인-기타'\n",
- " elif asset_field == 'ASSETS_VAL_12':\n",
- " return '사설-전체IP대역(유선)'\n",
- " elif asset_field == 'ASSETS_VAL_13':\n",
- " return '사설-전체IP대역(무선)'\n",
- " elif asset_field == 'ASSETS_VAL_14':\n",
- " return '사설-WEB서버'\n",
- " elif asset_field == 'ASSETS_VAL_15':\n",
- " return '사설-내부응용서버'\n",
- " elif asset_field == 'ASSETS_VAL_16':\n",
- " return '사설-DB서버'\n",
- " elif asset_field == 'ASSETS_VAL_17':\n",
- " return '사설-패치서버'\n",
- " elif asset_field == 'ASSETS_VAL_18':\n",
- " return '사설-네트워크'\n",
- " elif asset_field == 'ASSETS_VAL_19':\n",
- " return '사설-보안'\n",
- " elif asset_field == 'ASSETS_VAL_20':\n",
- " return '사설-업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_21':\n",
- " return '사설-비업무용PC'\n",
- " elif asset_field == 'ASSETS_VAL_22':\n",
- " return '사설-기타'\n",
- " else:\n",
- " return ''"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 379,
- "source": [
- "# New assets column\n",
- "MTM_df['ASSETS_VAL']=list(map(filter_assets_value_MTM, RISK_V2_FILTERED_MTM))\n",
- "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)\n",
- "MTM_df[:1]"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "<ipython-input-379-706f111c14af>:2: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['ASSETS_VAL']=list(map(filter_assets_value_MTM, RISK_V2_FILTERED_MTM))\n",
- "<ipython-input-379-706f111c14af>:3: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>RISK_V2</th>\n",
- " <th>INST_NM</th>\n",
- " <th>DRULE_ATT_TYPE_CODE1</th>\n",
- " <th>TW_ATT_IP</th>\n",
- " <th>TW_ATT_PORT</th>\n",
- " <th>TW_DMG_IP</th>\n",
- " <th>TW_DMG_PORT</th>\n",
- " <th>ACCD_DMG_PROTO_NM</th>\n",
- " <th>TW_ATT_CT_NM</th>\n",
- " <th>ACCD_FIND_MTD_CODE</th>\n",
- " <th>DRULE_NM</th>\n",
- " <th>ASSETS_VAL</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>8</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>남서울대학교</td>\n",
- " <td>Attack</td>\n",
- " <td>159.69.250.163</td>\n",
- " <td>80</td>\n",
- " <td>192.170.112.14</td>\n",
- " <td>8225</td>\n",
- " <td></td>\n",
- " <td>독일</td>\n",
- " <td>2</td>\n",
- " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
- " <td>[]</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " RISK_V2 INST_NM \\\n",
- "8 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 남서울대학교 \n",
- "\n",
- " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP \\\n",
- "8 Attack 159.69.250.163 80 192.170.112.14 \n",
- "\n",
- " TW_DMG_PORT ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
- "8 8225 독일 2 \n",
- "\n",
- " DRULE_NM ASSETS_VAL \n",
- "8 Malwr_Infected_29_11_exe_collect_20042001ECSC [] "
- ]
- },
- "metadata": {},
- "execution_count": 379
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 381,
- "source": [
- "# modified\n",
- "def filter_intent_MTM(intent):\n",
- " intents=[]\n",
- " for intent_key in intent:\n",
- " if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
- " intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
- " intents.append(intent_key_desc)\n",
- " return intents"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 382,
- "source": [
- "def get_intent_desc_MTM(intent_field):\n",
- " if intent_field == 'INTENT_VAL_1':\n",
- " return '파괴'\n",
- " elif intent_field == 'INTENT_VAL_2':\n",
- " return '유출'\n",
- " elif intent_field == 'INTENT_VAL_3':\n",
- " return '지연'\n",
- " elif intent_field == 'INTENT_VAL_4':\n",
- " return '잠복'\n",
- " elif intent_field == 'INTENT_VAL_5':\n",
- " return '단순침입'\n",
- " elif intent_field == 'INTENT_VAL_6':\n",
- " return 'MD5'\n",
- " elif intent_field == 'INTENT_VAL_0':\n",
- " return 'Default'\n",
- " else:\n",
- " return ''"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 383,
- "source": [
- "# New column of intent value\n",
- "MTM_df['INTENT_VAL']=list(map(filter_intent_MTM, RISK_V2_FILTERED_MTM))\n",
- "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)\n",
- "MTM_df[:1]"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "<ipython-input-383-e32409088c44>:2: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['INTENT_VAL']=list(map(filter_intent_MTM, RISK_V2_FILTERED_MTM))\n",
- "<ipython-input-383-e32409088c44>:3: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>RISK_V2</th>\n",
- " <th>INST_NM</th>\n",
- " <th>DRULE_ATT_TYPE_CODE1</th>\n",
- " <th>TW_ATT_IP</th>\n",
- " <th>TW_ATT_PORT</th>\n",
- " <th>TW_DMG_IP</th>\n",
- " <th>TW_DMG_PORT</th>\n",
- " <th>ACCD_DMG_PROTO_NM</th>\n",
- " <th>TW_ATT_CT_NM</th>\n",
- " <th>ACCD_FIND_MTD_CODE</th>\n",
- " <th>DRULE_NM</th>\n",
- " <th>ASSETS_VAL</th>\n",
- " <th>INTENT_VAL</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>8</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>남서울대학교</td>\n",
- " <td>Attack</td>\n",
- " <td>159.69.250.163</td>\n",
- " <td>80</td>\n",
- " <td>192.170.112.14</td>\n",
- " <td>8225</td>\n",
- " <td></td>\n",
- " <td>독일</td>\n",
- " <td>2</td>\n",
- " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
- " <td>[]</td>\n",
- " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " RISK_V2 INST_NM \\\n",
- "8 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 남서울대학교 \n",
- "\n",
- " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP \\\n",
- "8 Attack 159.69.250.163 80 192.170.112.14 \n",
- "\n",
- " TW_DMG_PORT ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
- "8 8225 독일 2 \n",
- "\n",
- " DRULE_NM ASSETS_VAL \\\n",
- "8 Malwr_Infected_29_11_exe_collect_20042001ECSC [] \n",
- "\n",
- " INTENT_VAL \n",
- "8 ['RISK_V2.INTENT_VAL_0=Default'] "
- ]
- },
- "metadata": {},
- "execution_count": 383
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 384,
- "source": [
- "# modified\n",
- "def filter_source_MTM(source):\n",
- " sources=[]\n",
- " for source_key in source:\n",
- " if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
- " source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
- " sources.append(source_key_desc)\n",
- " return sources"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 385,
- "source": [
- "def get_source_desc_MTM(source_field):\n",
- " if source_field=='SOURCE_VAL_1':\n",
- " return '북한IP'\n",
- " if source_field=='SOURCE_VAL_3':\n",
- " return 'ECSC Black IP'\n",
- " else:\n",
- " return ''"
- ],
- "outputs": [],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 386,
- "source": [
- "# New column of SOURCE_VAL value\n",
- "MTM_df['SOURCE_VAL']=list(map(filter_source_MTM, RISK_V2_FILTERED_MTM))\n",
- "MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].astype(str)\n",
- "MTM_df[:5]"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "<ipython-input-386-f88f537aeb2d>:2: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['SOURCE_VAL']=list(map(filter_source_MTM, RISK_V2_FILTERED_MTM))\n",
- "<ipython-input-386-f88f537aeb2d>:3: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].astype(str)\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>RISK_V2</th>\n",
- " <th>INST_NM</th>\n",
- " <th>DRULE_ATT_TYPE_CODE1</th>\n",
- " <th>TW_ATT_IP</th>\n",
- " <th>TW_ATT_PORT</th>\n",
- " <th>TW_DMG_IP</th>\n",
- " <th>TW_DMG_PORT</th>\n",
- " <th>ACCD_DMG_PROTO_NM</th>\n",
- " <th>TW_ATT_CT_NM</th>\n",
- " <th>ACCD_FIND_MTD_CODE</th>\n",
- " <th>DRULE_NM</th>\n",
- " <th>ASSETS_VAL</th>\n",
- " <th>INTENT_VAL</th>\n",
- " <th>SOURCE_VAL</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>8</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>남서울대학교</td>\n",
- " <td>Attack</td>\n",
- " <td>159.69.250.163</td>\n",
- " <td>80</td>\n",
- " <td>192.170.112.14</td>\n",
- " <td>8225</td>\n",
- " <td></td>\n",
- " <td>독일</td>\n",
- " <td>2</td>\n",
- " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
- " <td>[]</td>\n",
- " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
- " <td>[]</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>322</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>전라북도교육청</td>\n",
- " <td>Attack</td>\n",
- " <td>115.95.20.116</td>\n",
- " <td>80</td>\n",
- " <td>211.251.39.65</td>\n",
- " <td>2447</td>\n",
- " <td>TCP</td>\n",
- " <td>대한민국</td>\n",
- " <td>2</td>\n",
- " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
- " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
- " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
- " <td>[]</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1064</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>전라북도교육청</td>\n",
- " <td>Attack</td>\n",
- " <td>211.210.30.28</td>\n",
- " <td>80</td>\n",
- " <td>211.251.133.40</td>\n",
- " <td>4716</td>\n",
- " <td>TCP</td>\n",
- " <td>대한민국</td>\n",
- " <td>2</td>\n",
- " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
- " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V...</td>\n",
- " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
- " <td>[]</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1419</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>전라북도교육청</td>\n",
- " <td>Attack</td>\n",
- " <td>211.210.30.28</td>\n",
- " <td>80</td>\n",
- " <td>211.251.122.33</td>\n",
- " <td>4523</td>\n",
- " <td>TCP</td>\n",
- " <td>대한민국</td>\n",
- " <td>2</td>\n",
- " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
- " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V...</td>\n",
- " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
- " <td>[]</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2973</th>\n",
- " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
- " <td>남서울대학교</td>\n",
- " <td>Attack</td>\n",
- " <td>202.176.5.136</td>\n",
- " <td>8136</td>\n",
- " <td>192.168.107.59</td>\n",
- " <td>49207</td>\n",
- " <td>TCP</td>\n",
- " <td>말레이시아</td>\n",
- " <td>2</td>\n",
- " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
- " <td>[]</td>\n",
- " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
- " <td>[]</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " RISK_V2 INST_NM \\\n",
- "8 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 남서울대학교 \n",
- "322 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 전라북도교육청 \n",
- "1064 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 전라북도교육청 \n",
- "1419 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 전라북도교육청 \n",
- "2973 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 남서울대학교 \n",
- "\n",
- " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP \\\n",
- "8 Attack 159.69.250.163 80 192.170.112.14 \n",
- "322 Attack 115.95.20.116 80 211.251.39.65 \n",
- "1064 Attack 211.210.30.28 80 211.251.133.40 \n",
- "1419 Attack 211.210.30.28 80 211.251.122.33 \n",
- "2973 Attack 202.176.5.136 8136 192.168.107.59 \n",
- "\n",
- " TW_DMG_PORT ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
- "8 8225 독일 2 \n",
- "322 2447 TCP 대한민국 2 \n",
- "1064 4716 TCP 대한민국 2 \n",
- "1419 4523 TCP 대한민국 2 \n",
- "2973 49207 TCP 말레이시아 2 \n",
- "\n",
- " DRULE_NM \\\n",
- "8 Malwr_Infected_29_11_exe_collect_20042001ECSC \n",
- "322 Malwr_Infected_29_11_exe_collect_20042001ECSC \n",
- "1064 Malwr_Infected_29_11_exe_collect_20042001ECSC \n",
- "1419 Malwr_Infected_29_11_exe_collect_20042001ECSC \n",
- "2973 Malwr_Infected_29_11_exe_collect_20042001ECSC \n",
- "\n",
- " ASSETS_VAL \\\n",
- "8 [] \n",
- "322 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] \n",
- "1064 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V... \n",
- "1419 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V... \n",
- "2973 [] \n",
- "\n",
- " INTENT_VAL SOURCE_VAL \n",
- "8 ['RISK_V2.INTENT_VAL_0=Default'] [] \n",
- "322 ['RISK_V2.INTENT_VAL_0=Default'] [] \n",
- "1064 ['RISK_V2.INTENT_VAL_0=Default'] [] \n",
- "1419 ['RISK_V2.INTENT_VAL_0=Default'] [] \n",
- "2973 ['RISK_V2.INTENT_VAL_0=Default'] [] "
- ]
- },
- "metadata": {},
- "execution_count": 386
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 387,
- "source": [
- "MTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
- "MTM_df.columns"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/Users/joohyunyoon/.pyenv/versions/anaconda3-2021.05/lib/python3.8/site-packages/pandas/core/frame.py:4308: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " return super().drop(\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "Index(['INST_NM', 'DRULE_ATT_TYPE_CODE1', 'TW_ATT_IP', 'TW_ATT_PORT',\n",
- " 'TW_DMG_IP', 'TW_DMG_PORT', 'ACCD_DMG_PROTO_NM', 'TW_ATT_CT_NM',\n",
- " 'ACCD_FIND_MTD_CODE', 'DRULE_NM', 'ASSETS_VAL', 'INTENT_VAL',\n",
- " 'SOURCE_VAL'],\n",
- " dtype='object')"
- ]
- },
- "metadata": {},
- "execution_count": 387
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 388,
- "source": [
- "MTM_df.isna().sum()"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "INST_NM 0\n",
- "DRULE_ATT_TYPE_CODE1 0\n",
- "TW_ATT_IP 0\n",
- "TW_ATT_PORT 0\n",
- "TW_DMG_IP 0\n",
- "TW_DMG_PORT 0\n",
- "ACCD_DMG_PROTO_NM 0\n",
- "TW_ATT_CT_NM 0\n",
- "ACCD_FIND_MTD_CODE 0\n",
- "DRULE_NM 0\n",
- "ASSETS_VAL 0\n",
- "INTENT_VAL 0\n",
- "SOURCE_VAL 0\n",
- "dtype: int64"
- ]
- },
- "metadata": {},
- "execution_count": 388
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 389,
- "source": [
- "# Change the Nan to zero\n",
- "MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
- "MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')\n",
- "MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
- "MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
- "MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
- "MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
- "MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
- "MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
- "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
- "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)\n",
- "MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
- "MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "<ipython-input-389-02158490b065>:2: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
- "<ipython-input-389-02158490b065>:3: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')\n",
- "<ipython-input-389-02158490b065>:4: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
- "<ipython-input-389-02158490b065>:5: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
- "<ipython-input-389-02158490b065>:6: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
- "<ipython-input-389-02158490b065>:7: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
- "<ipython-input-389-02158490b065>:8: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
- "<ipython-input-389-02158490b065>:9: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
- "<ipython-input-389-02158490b065>:10: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
- "<ipython-input-389-02158490b065>:11: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)\n",
- "<ipython-input-389-02158490b065>:12: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
- "<ipython-input-389-02158490b065>:13: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')\n"
- ]
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 390,
- "source": [
- "# Check NaN out again\n",
- "MTM_df.isna().sum()"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "INST_NM 0\n",
- "DRULE_ATT_TYPE_CODE1 0\n",
- "TW_ATT_IP 0\n",
- "TW_ATT_PORT 0\n",
- "TW_DMG_IP 0\n",
- "TW_DMG_PORT 0\n",
- "ACCD_DMG_PROTO_NM 0\n",
- "TW_ATT_CT_NM 0\n",
- "ACCD_FIND_MTD_CODE 0\n",
- "DRULE_NM 0\n",
- "ASSETS_VAL 0\n",
- "INTENT_VAL 0\n",
- "SOURCE_VAL 0\n",
- "dtype: int64"
- ]
- },
- "metadata": {},
- "execution_count": 390
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 391,
- "source": [
- "# # Merge all\n",
- "\n",
- "# # Make one string from all of elements\n",
- "MTM_df['Combined']=MTM_df['INST_NM'].astype(str)+' '+MTM_df['TW_ATT_IP'].astype(str)+' '+MTM_df['TW_ATT_PORT'].astype(str)+' '+MTM_df['TW_DMG_IP'].astype(str)+' '+MTM_df['TW_DMG_PORT'].astype(str) +' '+MTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+MTM_df['TW_ATT_CT_NM']+' '+MTM_df['ASSETS_VAL']+' '+MTM_df['INTENT_VAL']+' '+MTM_df['SOURCE_VAL']+' '+MTM_df['DRULE_ATT_TYPE_CODE1']+' '+MTM_df['DRULE_NM']\n",
- "\n",
- "MTM_com=MTM_df['Combined']\n",
- "MTM_com[:10]\n"
- ],
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "<ipython-input-391-644b4d0a1409>:4: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " MTM_df['Combined']=MTM_df['INST_NM'].astype(str)+' '+MTM_df['TW_ATT_IP'].astype(str)+' '+MTM_df['TW_ATT_PORT'].astype(str)+' '+MTM_df['TW_DMG_IP'].astype(str)+' '+MTM_df['TW_DMG_PORT'].astype(str) +' '+MTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+MTM_df['TW_ATT_CT_NM']+' '+MTM_df['ASSETS_VAL']+' '+MTM_df['INTENT_VAL']+' '+MTM_df['SOURCE_VAL']+' '+MTM_df['DRULE_ATT_TYPE_CODE1']+' '+MTM_df['DRULE_NM']\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "8 남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...\n",
- "322 전라북도교육청 115.95.20.116 80 211.251.39.65 2447 TC...\n",
- "1064 전라북도교육청 211.210.30.28 80 211.251.133.40 4716 T...\n",
- "1419 전라북도교육청 211.210.30.28 80 211.251.122.33 4523 T...\n",
- "2973 남서울대학교 202.176.5.136 8136 192.168.107.59 49207...\n",
- "3584 경남대학교 209.250.247.60 80 203.253.180.71 2073 TC...\n",
- "4270 전라북도교육청 211.210.30.28 80 211.251.117.65 5212 T...\n",
- "4490 부산대학교 164.125.248.75 1098 195.158.31.58 2961 T...\n",
- "5506 부산대학교 164.125.242.79 52896 46.249.119.133 4941...\n",
- "5765 전라북도교육청 219.249.231.84 80 211.251.82.1 52721 T...\n",
- "Name: Combined, dtype: object"
- ]
- },
- "metadata": {},
- "execution_count": 391
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 392,
- "source": [
- "# Change the type to DataFrame\n",
- "MTM_to_df=pd.DataFrame(MTM_com)\n",
- "MTM_to_df[:5]"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Combined</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>8</th>\n",
- " <td>남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>322</th>\n",
- " <td>전라북도교육청 115.95.20.116 80 211.251.39.65 2447 TC...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1064</th>\n",
- " <td>전라북도교육청 211.210.30.28 80 211.251.133.40 4716 T...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1419</th>\n",
- " <td>전라북도교육청 211.210.30.28 80 211.251.122.33 4523 T...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2973</th>\n",
- " <td>남서울대학교 202.176.5.136 8136 192.168.107.59 49207...</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Combined\n",
- "8 남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...\n",
- "322 전라북도교육청 115.95.20.116 80 211.251.39.65 2447 TC...\n",
- "1064 전라북도교육청 211.210.30.28 80 211.251.133.40 4716 T...\n",
- "1419 전라북도교육청 211.210.30.28 80 211.251.122.33 4523 T...\n",
- "2973 남서울대학교 202.176.5.136 8136 192.168.107.59 49207..."
- ]
- },
- "metadata": {},
- "execution_count": 392
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 393,
- "source": [
- "# Change the type to list in order to apply the algorithm(nested list)\n",
- "MTM_tolist=MTM_to_df.values.tolist()\n",
- "MTM_tolist[:5]"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "[[\"남서울대학교 159.69.250.163 80 192.170.112.14 8225 독일 [] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"],\n",
- " [\"전라북도교육청 115.95.20.116 80 211.251.39.65 2447 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"],\n",
- " [\"전라북도교육청 211.210.30.28 80 211.251.133.40 4716 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_11=공인-기타'] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"],\n",
- " [\"전라북도교육청 211.210.30.28 80 211.251.122.33 4523 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_11=공인-기타'] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"],\n",
- " [\"남서울대학교 202.176.5.136 8136 192.168.107.59 49207 TCP 말레이시아 [] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"]]"
- ]
- },
- "metadata": {},
- "execution_count": 393
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 394,
- "source": [
- "# Apply prefixspan\n",
- "PrefixSpan_MTM = PrefixSpan(MTM_tolist)\n",
- "\n",
- "###### Interchangeable ######\n",
- "# Get any over frequency 1 \n",
- "prefix_MTM=PrefixSpan_MTM.frequent(1)\n",
- "prefix_MTM[:3]"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "[(1,\n",
- " [\"남서울대학교 159.69.250.163 80 192.170.112.14 8225 독일 [] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"]),\n",
- " (1,\n",
- " [\"전라북도교육청 115.95.20.116 80 211.251.39.65 2447 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"]),\n",
- " (1,\n",
- " [\"전라북도교육청 211.210.30.28 80 211.251.133.40 4716 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_11=공인-기타'] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"])]"
- ]
- },
- "metadata": {},
- "execution_count": 394
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 395,
- "source": [
- "# Put the result to DataFrame\n",
- "prefix_MTM_df=pd.DataFrame(prefix_MTM)\n",
- "prefix_MTM_df[:5]"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>0</th>\n",
- " <th>1</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>1</td>\n",
- " <td>[남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>1</td>\n",
- " <td>[전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>1</td>\n",
- " <td>[전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>1</td>\n",
- " <td>[전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>1</td>\n",
- " <td>[남서울대학교 202.176.5.136 8136 192.168.107.59 4920...</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " 0 1\n",
- "0 1 [남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...\n",
- "1 1 [전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...\n",
- "2 1 [전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ...\n",
- "3 1 [전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ...\n",
- "4 1 [남서울대학교 202.176.5.136 8136 192.168.107.59 4920..."
- ]
- },
- "metadata": {},
- "execution_count": 395
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 396,
- "source": [
- "# Change the columns name\n",
- "prefix_MTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
- "\n",
- "# Make the new column for filling the Effect\n",
- "prefix_MTM_df['Effect']=np.nan\n",
- "\n",
- "# Change the order of columns\n",
- "prefix_MTM_df=prefix_MTM_df[['Cause','Effect','Frequency']]\n",
- "prefix_MTM_df[:2]"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Cause</th>\n",
- " <th>Effect</th>\n",
- " <th>Frequency</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>[남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
- " <td>NaN</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>[전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...</td>\n",
- " <td>NaN</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Cause Effect Frequency\n",
- "0 [남서울대학교 159.69.250.163 80 192.170.112.14 8225 ... NaN 1\n",
- "1 [전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T... NaN 1"
- ]
- },
- "metadata": {},
- "execution_count": 396
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 397,
- "source": [
- "# Define the function that find the rule name \n",
- "def generate_cause_MTM(cell):\n",
- " drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
- " for drule in drules:\n",
- " if ' '+drule in cell[0]:\n",
- " return drule \n",
- " return ''\n",
- " \n",
- "# Mapping the rule name with cause that is the effect\n",
- "effect_MTM=list(map(generate_cause, prefix_MTM_df.Cause))\n",
- "\n",
- "# Assign the rule name as an effect\n",
- "prefix_MTM_df['Effect']=effect_MTM\n",
- "prefix_MTM_df.sort_values(by=['Frequency'],ascending=False)"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Cause</th>\n",
- " <th>Effect</th>\n",
- " <th>Frequency</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>[남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>[전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>[전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>[전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>[남서울대학교 202.176.5.136 8136 192.168.107.59 4920...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5</th>\n",
- " <td>[경남대학교 209.250.247.60 80 203.253.180.71 2073 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6</th>\n",
- " <td>[전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7</th>\n",
- " <td>[부산대학교 164.125.248.75 1098 195.158.31.58 2961 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8</th>\n",
- " <td>[부산대학교 164.125.242.79 52896 46.249.119.133 494...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9</th>\n",
- " <td>[전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10</th>\n",
- " <td>[전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>11</th>\n",
- " <td>[남서울대학교 173.208.153.130 8130 220.68.191.80 498...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>12</th>\n",
- " <td>[경남대학교 211.231.104.12 80 203.253.179.17 55066 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>13</th>\n",
- " <td>[부산대학교 164.125.169.37 3697 45.80.184.171 80 TC...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>14</th>\n",
- " <td>[남서울대학교 172.83.155.170 8170 220.68.191.24 4919...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>15</th>\n",
- " <td>[부산대학교 164.125.244.196 1098 203.96.170.114 570...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Cause Effect Frequency\n",
- "0 [남서울대학교 159.69.250.163 80 192.170.112.14 8225 ... Attack 1\n",
- "1 [전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T... Attack 1\n",
- "2 [전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ... Attack 1\n",
- "3 [전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ... Attack 1\n",
- "4 [남서울대학교 202.176.5.136 8136 192.168.107.59 4920... Attack 1\n",
- "5 [경남대학교 209.250.247.60 80 203.253.180.71 2073 T... Attack 1\n",
- "6 [전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ... Attack 1\n",
- "7 [부산대학교 164.125.248.75 1098 195.158.31.58 2961 ... Attack 1\n",
- "8 [부산대학교 164.125.242.79 52896 46.249.119.133 494... Attack 1\n",
- "9 [전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ... Attack 1\n",
- "10 [전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T... Attack 1\n",
- "11 [남서울대학교 173.208.153.130 8130 220.68.191.80 498... Attack 1\n",
- "12 [경남대학교 211.231.104.12 80 203.253.179.17 55066 ... Attack 1\n",
- "13 [부산대학교 164.125.169.37 3697 45.80.184.171 80 TC... Attack 1\n",
- "14 [남서울대학교 172.83.155.170 8170 220.68.191.24 4919... Attack 1\n",
- "15 [부산대학교 164.125.244.196 1098 203.96.170.114 570... Attack 1"
- ]
- },
- "metadata": {},
- "execution_count": 397
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": 399,
- "source": [
- "# Attack Filter\n",
- "def Attack_filter_MTM(ps):\n",
- " return ' Attack' in ps[0]\n",
- "\n",
- "att_filter_MTM=prefix_MTM_df[list(map(Attack_filter_MTM, prefix_MTM_df.Cause))].fillna('Attack')\n",
- "\n",
- "# Malwr Filter\n",
- "def Malwr_filter_MTM(ps):\n",
- " return ' Malwr' in ps[0]\n",
- "\n",
- "mal_filter_MTM=prefix_MTM_df[list(map(Malwr_filter_MTM, prefix_MTM_df.Cause))].fillna('Malwr')\n",
- "\n",
- "# DDOS Filter\n",
- "def DDOS_filter_MTM(ps):\n",
- " return ' DDOS' in ps[0]\n",
- "\n",
- "dd_filter_MTM=prefix_MTM_df[list(map(DDOS_filter_MTM, prefix_MTM_df.Cause))].fillna('DDOS')\n",
- "\n",
- "# HACK Filter\n",
- "def HACK_filter_MTM(ps):\n",
- " return ' HACK' in ps[0]\n",
- "\n",
- "hack_filter_MTM=prefix_MTM_df[list(map(HACK_filter_MTM, prefix_MTM_df.Cause))].fillna('HACK')\n",
- "\n",
- "# MAIL Filter\n",
- "def MAIL_filter_MTM(ps):\n",
- " return ' MAIL' in ps[0]\n",
- "\n",
- "mail_filter_MTM=prefix_MTM_df[list(map(MAIL_filter_MTM, prefix_MTM_df.Cause))].fillna('MAIL')\n",
- "\n",
- "# WEB Filter\n",
- "def WEB_filter_MTM(ps):\n",
- " return ' WEB' in ps[0]\n",
- "\n",
- "prefix_MTM_df[:5]\n",
- "web_filter_MTM=prefix_MTM_df[list(map(WEB_filter_MTM, prefix_MTM_df.Cause))].fillna('WEB')\n",
- "\n",
- "frames_MTM = [att_filter_MTM, mal_filter_MTM, dd_filter_MTM, hack_filter_MTM, mail_filter_MTM, web_filter_MTM]\n",
- "result_MTM = pd.concat(frames_MTM)\n",
- "result_MTM.sort_values(by=['Frequency'],ascending=False)"
- ],
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Cause</th>\n",
- " <th>Effect</th>\n",
- " <th>Frequency</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>[남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>[전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>14</th>\n",
- " <td>[남서울대학교 172.83.155.170 8170 220.68.191.24 4919...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>13</th>\n",
- " <td>[부산대학교 164.125.169.37 3697 45.80.184.171 80 TC...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>12</th>\n",
- " <td>[경남대학교 211.231.104.12 80 203.253.179.17 55066 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>11</th>\n",
- " <td>[남서울대학교 173.208.153.130 8130 220.68.191.80 498...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10</th>\n",
- " <td>[전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9</th>\n",
- " <td>[전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8</th>\n",
- " <td>[부산대학교 164.125.242.79 52896 46.249.119.133 494...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7</th>\n",
- " <td>[부산대학교 164.125.248.75 1098 195.158.31.58 2961 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6</th>\n",
- " <td>[전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5</th>\n",
- " <td>[경남대학교 209.250.247.60 80 203.253.180.71 2073 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>[남서울대학교 202.176.5.136 8136 192.168.107.59 4920...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>[전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>[전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>[전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>[남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>15</th>\n",
- " <td>[부산대학교 164.125.244.196 1098 203.96.170.114 570...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>14</th>\n",
- " <td>[남서울대학교 172.83.155.170 8170 220.68.191.24 4919...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>13</th>\n",
- " <td>[부산대학교 164.125.169.37 3697 45.80.184.171 80 TC...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>12</th>\n",
- " <td>[경남대학교 211.231.104.12 80 203.253.179.17 55066 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>11</th>\n",
- " <td>[남서울대학교 173.208.153.130 8130 220.68.191.80 498...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10</th>\n",
- " <td>[전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9</th>\n",
- " <td>[전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8</th>\n",
- " <td>[부산대학교 164.125.242.79 52896 46.249.119.133 494...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7</th>\n",
- " <td>[부산대학교 164.125.248.75 1098 195.158.31.58 2961 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6</th>\n",
- " <td>[전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5</th>\n",
- " <td>[경남대학교 209.250.247.60 80 203.253.180.71 2073 T...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>[남서울대학교 202.176.5.136 8136 192.168.107.59 4920...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>[전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>[전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>15</th>\n",
- " <td>[부산대학교 164.125.244.196 1098 203.96.170.114 570...</td>\n",
- " <td>Attack</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Cause Effect Frequency\n",
- "0 [남서울대학교 159.69.250.163 80 192.170.112.14 8225 ... Attack 1\n",
- "1 [전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T... Attack 1\n",
- "14 [남서울대학교 172.83.155.170 8170 220.68.191.24 4919... Attack 1\n",
- "13 [부산대학교 164.125.169.37 3697 45.80.184.171 80 TC... Attack 1\n",
- "12 [경남대학교 211.231.104.12 80 203.253.179.17 55066 ... Attack 1\n",
- "11 [남서울대학교 173.208.153.130 8130 220.68.191.80 498... Attack 1\n",
- "10 [전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T... Attack 1\n",
- "9 [전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ... Attack 1\n",
- "8 [부산대학교 164.125.242.79 52896 46.249.119.133 494... Attack 1\n",
- "7 [부산대학교 164.125.248.75 1098 195.158.31.58 2961 ... Attack 1\n",
- "6 [전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ... Attack 1\n",
- "5 [경남대학교 209.250.247.60 80 203.253.180.71 2073 T... Attack 1\n",
- "4 [남서울대학교 202.176.5.136 8136 192.168.107.59 4920... Attack 1\n",
- "3 [전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ... Attack 1\n",
- "2 [전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ... Attack 1\n",
- "1 [전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T... Attack 1\n",
- "0 [남서울대학교 159.69.250.163 80 192.170.112.14 8225 ... Attack 1\n",
- "15 [부산대학교 164.125.244.196 1098 203.96.170.114 570... Attack 1\n",
- "14 [남서울대학교 172.83.155.170 8170 220.68.191.24 4919... Attack 1\n",
- "13 [부산대학교 164.125.169.37 3697 45.80.184.171 80 TC... Attack 1\n",
- "12 [경남대학교 211.231.104.12 80 203.253.179.17 55066 ... Attack 1\n",
- "11 [남서울대학교 173.208.153.130 8130 220.68.191.80 498... Attack 1\n",
- "10 [전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T... Attack 1\n",
- "9 [전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ... Attack 1\n",
- "8 [부산대학교 164.125.242.79 52896 46.249.119.133 494... Attack 1\n",
- "7 [부산대학교 164.125.248.75 1098 195.158.31.58 2961 ... Attack 1\n",
- "6 [전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ... Attack 1\n",
- "5 [경남대학교 209.250.247.60 80 203.253.180.71 2073 T... Attack 1\n",
- "4 [남서울대학교 202.176.5.136 8136 192.168.107.59 4920... Attack 1\n",
- "3 [전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ... Attack 1\n",
- "2 [전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ... Attack 1\n",
- "15 [부산대학교 164.125.244.196 1098 203.96.170.114 570... Attack 1"
- ]
- },
- "metadata": {},
- "execution_count": 399
- }
- ],
- "metadata": {}
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "source": [],
- "outputs": [],
- "metadata": {}
- }
- ],
- "metadata": {
- "orig_nbformat": 4,
- "language_info": {
- "name": "python",
- "version": "3.8.8",
- "mimetype": "text/x-python",
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "pygments_lexer": "ipython3",
- "nbconvert_exporter": "python",
- "file_extension": ".py"
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3.8.8 64-bit ('anaconda3-2021.05': pyenv)"
- },
- "interpreter": {
- "hash": "f4c95b739b6e12099bf04dd3c3302c87ef63de308852c06a666878cc26abb6cc"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|