파이썬 기반의 Prefix span 분석_fork
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

PrefixSpan_20210925.ipynb 135KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "source": [
  7. "import os\n",
  8. "import array\n",
  9. "import math\n",
  10. "import pickle\n",
  11. "# import joblib\n",
  12. "import sys\n",
  13. "import argparse\n",
  14. "import pandas as pd\n",
  15. "import numpy as np\n",
  16. "import matplotlib.pyplot as plt\n",
  17. "from datetime import datetime\n",
  18. "from pprint import pprint\n",
  19. "import ssl\n",
  20. "from elasticsearch.connection import create_ssl_context\n",
  21. "from elasticsearch import Elasticsearch\n",
  22. "from elasticsearch import helpers\n",
  23. "import urllib3"
  24. ],
  25. "outputs": [],
  26. "metadata": {}
  27. },
  28. {
  29. "cell_type": "code",
  30. "execution_count": 3,
  31. "source": [
  32. "import pandas as pd\n",
  33. "import numpy as np\n",
  34. "from mlxtend.preprocessing import TransactionEncoder\n",
  35. "from mlxtend.frequent_patterns import association_rules, fpgrowth\n",
  36. "from prefixspan import PrefixSpan"
  37. ],
  38. "outputs": [],
  39. "metadata": {}
  40. },
  41. {
  42. "cell_type": "code",
  43. "execution_count": 4,
  44. "source": [
  45. "ssl_context = create_ssl_context()\n",
  46. "ssl_context.check_hostname = False\n",
  47. "ssl_context.verify_mode = ssl.CERT_NONE"
  48. ],
  49. "outputs": [],
  50. "metadata": {}
  51. },
  52. {
  53. "cell_type": "code",
  54. "execution_count": 12,
  55. "source": [
  56. "es = Elasticsearch(hosts=[{'host': '223.194.92.152', 'port': 9200}], scheme=\"http\",verify_certs=False, timeout=300, ssl_context=ssl_context, http_auth=(\"elasticsearch\", \"hadoop2019@!@#$\"))\n",
  57. "urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)"
  58. ],
  59. "outputs": [
  60. {
  61. "output_type": "stream",
  62. "name": "stderr",
  63. "text": [
  64. "/Users/joohyunyoon/.pyenv/versions/anaconda3-2021.05/lib/python3.8/site-packages/elasticsearch/connection/http_urllib3.py:158: UserWarning: When using `ssl_context`, all other SSL related kwargs are ignored\n",
  65. " warnings.warn(\n"
  66. ]
  67. }
  68. ],
  69. "metadata": {}
  70. },
  71. {
  72. "cell_type": "code",
  73. "execution_count": 347,
  74. "source": [
  75. "######## 2020, 1 year ########\n",
  76. "######## There are no MTM data in 2018, 2019 ########\n",
  77. "\n",
  78. "body = {\n",
  79. " \"size\" : 10000,\n",
  80. " \"query\": {\n",
  81. " \"range\":{\n",
  82. " \"TW_COLLECT_DT\":{\n",
  83. " \"gte\":\"2020-01-01T00:00:00.625+09:00\",\n",
  84. " \"lte\":\"2020-12-31T00:00:00.625+09:00\" ################\n",
  85. " }\n",
  86. " }\n",
  87. " }\n",
  88. "}\n",
  89. " \n",
  90. "res = es.search(index = 'ts_data_accident-2020', body=body)\n",
  91. "data = res['hits']['hits']\n",
  92. "total = res['hits']['total']\n",
  93. "\n",
  94. "print(total)\n",
  95. "\n",
  96. "accident = []\n",
  97. "for da in data:\n",
  98. " att_type = da['_source']\n",
  99. " # att_type[\"POL_NM\"]=att_type[\"SCEN_INFOS\"][0][\"POL_NM\"]\n",
  100. " accident.append(att_type)\n",
  101. "\n",
  102. "# df = pd.DataFrame(accident,dtype=str)\n",
  103. "df = pd.DataFrame(accident)\n",
  104. "\n",
  105. "print(df.head())"
  106. ],
  107. "outputs": [
  108. {
  109. "output_type": "stream",
  110. "name": "stderr",
  111. "text": [
  112. "<ipython-input-347-d3fe348d5858>:16: DeprecationWarning: The 'body' parameter is deprecated for the 'search' API and will be removed in 8.0.0. Instead use API parameters directly. See https://github.com/elastic/elasticsearch-py/issues/1698 for more information\n",
  113. " res = es.search(index = 'ts_data_accident-2020', body=body)\n"
  114. ]
  115. },
  116. {
  117. "output_type": "stream",
  118. "name": "stdout",
  119. "text": [
  120. "26531\n",
  121. " TW_ATT_IP_SEARCH_DATA \\\n",
  122. "0 {'ATT_DETAIL_ORG_NM': '계원예술대학교', 'INST_HIGH_CO... \n",
  123. "1 {'ATT_DETAIL_ORG_NM': '국민대학교', 'INST_HIGH_CODE... \n",
  124. "2 {'ATT_DETAIL_ORG_NM': '서울여자대학교', 'INST_HIGH_CO... \n",
  125. "3 {'ATT_DETAIL_ORG_NM': '서울대학교', 'INST_HIGH_CODE... \n",
  126. "4 None \n",
  127. "\n",
  128. " TW_ATT_GEOLOCATION ACCD_CHARGER_ID \\\n",
  129. "0 37.5112, 126.97409999999999 kmryu787 \n",
  130. "1 37.751, -97.822 hk8120 \n",
  131. "2 34.7725, 113.7266 sjsong \n",
  132. "3 50.9919, -4.2474 hurache \n",
  133. "4 41.0, 64.0 deuniv \n",
  134. "\n",
  135. " ACCD_DMG_ATTACK_NM ACCD_DCL_INST_NM \\\n",
  136. "0 Malwr-Resource-29-01-coinminer.18033003ECSC# 교육부사이버안전센터 \n",
  137. "1 Malwr-Sysinfo-29-01-json.16101205ECSC# 교육부사이버안전센터 \n",
  138. "2 Attack-WebScan-29-01-download(passwd).19092602... 교육부사이버안전센터 \n",
  139. "3 Attack-Scan-29-01-botnet(satori).19061302ECSC# 교육부사이버안전센터 \n",
  140. "4 Malwr-Resource-29-01-photo.scr.16053101ECSC# 교육부사이버안전센터 \n",
  141. "\n",
  142. " ACCD_FIND_MTD_CODE RISK_V2 \\\n",
  143. "0 1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... \n",
  144. "1 1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... \n",
  145. "2 1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... \n",
  146. "3 1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... \n",
  147. "4 1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... \n",
  148. "\n",
  149. " TW_DMG_CT_NM ACCD_CLOSE_PROCESS_CODE TW_DMG_GEOLOCATION ... \\\n",
  150. "0 독일 1 51.2993, 9.491 ... \n",
  151. "1 대한민국 1 37.5985, 126.9783 ... \n",
  152. "2 대한민국 1 37.5112, 126.97409999999999 ... \n",
  153. "3 대한민국 1 37.5985, 126.9783 ... \n",
  154. "4 대한민국 1 37.5112, 126.97409999999999 ... \n",
  155. "\n",
  156. " ACCD_EMAIL DRULE_NM \\\n",
  157. "0 ryu@kaywon.ac.kr Malwr-Resource-29-01-coinminer.18033003ECSC# \n",
  158. "1 hk8120@kookmin.ac.kr Malwr-Sysinfo-29-01-json.16101205ECSC# \n",
  159. "2 sjsong@swu.ac.kr Attack-WebScan-29-01-download(passwd).19092602... \n",
  160. "3 snucert@snu.ac.kr Attack-Scan-29-01-botnet(satori).19061302ECSC# \n",
  161. "4 sklee82@deu.ac.kr Malwr-Resource-29-01-photo.scr.16053101ECSC# \n",
  162. "\n",
  163. " GEAR_CODE USER_RISK_V2 \\\n",
  164. "0 73017000 {'WEEKNESS_VAL_4': '0', 'WEEKNESS_VAL_5': '0',... \n",
  165. "1 53026000 {'WEEKNESS_VAL_4': '0', 'WEEKNESS_VAL_5': '0',... \n",
  166. "2 53061000 {'WEEKNESS_VAL_4': '0', 'WEEKNESS_VAL_5': '0',... \n",
  167. "3 A000001609-N-00005 {'WEEKNESS_VAL_4': '0', 'WEEKNESS_VAL_5': '0',... \n",
  168. "4 A000001146-N-00002 {'WEEKNESS_VAL_4': '0', 'WEEKNESS_VAL_5': '0',... \n",
  169. "\n",
  170. " ACCD_DCL_REPORTER_POSITION TW_ATT_IP ACCD_NCSC_TRANSFER_NO \\\n",
  171. "0 선임 117.17.214.4 T20-043000077 \n",
  172. "1 사원 104.18.56.68 T20-050500085 \n",
  173. "2 사원 103.115.42.158 T20-010400003 \n",
  174. "3 사원 78.151.86.161 T20-010400014 \n",
  175. "4 선임 213.230.84.85 T20-050600072 \n",
  176. "\n",
  177. " TW_MALIG_FILE_BINARY INST_TYPE_DETAIL_CODE IS_DETAIL \n",
  178. "0 NaN NaN NaN \n",
  179. "1 NaN NaN NaN \n",
  180. "2 NaN NaN NaN \n",
  181. "3 NaN NaN NaN \n",
  182. "4 NaN NaN NaN \n",
  183. "\n",
  184. "[5 rows x 134 columns]\n"
  185. ]
  186. }
  187. ],
  188. "metadata": {}
  189. },
  190. {
  191. "cell_type": "code",
  192. "execution_count": 348,
  193. "source": [
  194. "df=df[['RISK_V2','INST_NM','DRULE_ATT_TYPE_CODE1','TW_ATT_IP','TW_ATT_PORT','TW_DMG_IP','TW_DMG_PORT','ACCD_DMG_PROTO_NM','TW_ATT_CT_NM','ACCD_FIND_MTD_CODE','DRULE_NM']].dropna()\n",
  195. "len(df)\n",
  196. "df.head()"
  197. ],
  198. "outputs": [
  199. {
  200. "output_type": "execute_result",
  201. "data": {
  202. "text/html": [
  203. "<div>\n",
  204. "<style scoped>\n",
  205. " .dataframe tbody tr th:only-of-type {\n",
  206. " vertical-align: middle;\n",
  207. " }\n",
  208. "\n",
  209. " .dataframe tbody tr th {\n",
  210. " vertical-align: top;\n",
  211. " }\n",
  212. "\n",
  213. " .dataframe thead th {\n",
  214. " text-align: right;\n",
  215. " }\n",
  216. "</style>\n",
  217. "<table border=\"1\" class=\"dataframe\">\n",
  218. " <thead>\n",
  219. " <tr style=\"text-align: right;\">\n",
  220. " <th></th>\n",
  221. " <th>RISK_V2</th>\n",
  222. " <th>INST_NM</th>\n",
  223. " <th>DRULE_ATT_TYPE_CODE1</th>\n",
  224. " <th>TW_ATT_IP</th>\n",
  225. " <th>TW_ATT_PORT</th>\n",
  226. " <th>TW_DMG_IP</th>\n",
  227. " <th>TW_DMG_PORT</th>\n",
  228. " <th>ACCD_DMG_PROTO_NM</th>\n",
  229. " <th>TW_ATT_CT_NM</th>\n",
  230. " <th>ACCD_FIND_MTD_CODE</th>\n",
  231. " <th>DRULE_NM</th>\n",
  232. " </tr>\n",
  233. " </thead>\n",
  234. " <tbody>\n",
  235. " <tr>\n",
  236. " <th>0</th>\n",
  237. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  238. " <td>계원예술대학교</td>\n",
  239. " <td>Malwr</td>\n",
  240. " <td>117.17.214.4</td>\n",
  241. " <td>2846</td>\n",
  242. " <td>5.9.163.18</td>\n",
  243. " <td>50001</td>\n",
  244. " <td>TCP</td>\n",
  245. " <td>대한민국</td>\n",
  246. " <td>1</td>\n",
  247. " <td>Malwr-Resource-29-01-coinminer.18033003ECSC#</td>\n",
  248. " </tr>\n",
  249. " <tr>\n",
  250. " <th>1</th>\n",
  251. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  252. " <td>국민대학교</td>\n",
  253. " <td>Malwr</td>\n",
  254. " <td>104.18.56.68</td>\n",
  255. " <td>80</td>\n",
  256. " <td>210.123.34.247</td>\n",
  257. " <td>51318</td>\n",
  258. " <td>TCP</td>\n",
  259. " <td>미국</td>\n",
  260. " <td>1</td>\n",
  261. " <td>Malwr-Sysinfo-29-01-json.16101205ECSC#</td>\n",
  262. " </tr>\n",
  263. " <tr>\n",
  264. " <th>2</th>\n",
  265. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  266. " <td>서울여자대학교</td>\n",
  267. " <td>Attack</td>\n",
  268. " <td>103.115.42.158</td>\n",
  269. " <td>46728</td>\n",
  270. " <td>203.246.40.25</td>\n",
  271. " <td>80</td>\n",
  272. " <td>TCP</td>\n",
  273. " <td>중국</td>\n",
  274. " <td>1</td>\n",
  275. " <td>Attack-WebScan-29-01-download(passwd).19092602...</td>\n",
  276. " </tr>\n",
  277. " <tr>\n",
  278. " <th>3</th>\n",
  279. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  280. " <td>서울대학교</td>\n",
  281. " <td>Attack</td>\n",
  282. " <td>78.151.86.161</td>\n",
  283. " <td>42911</td>\n",
  284. " <td>147.46.10.138</td>\n",
  285. " <td>80</td>\n",
  286. " <td>TCP</td>\n",
  287. " <td>영국</td>\n",
  288. " <td>1</td>\n",
  289. " <td>Attack-Scan-29-01-botnet(satori).19061302ECSC#</td>\n",
  290. " </tr>\n",
  291. " <tr>\n",
  292. " <th>4</th>\n",
  293. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  294. " <td>동의대학교</td>\n",
  295. " <td>Malwr</td>\n",
  296. " <td>213.230.84.85</td>\n",
  297. " <td>3666</td>\n",
  298. " <td>114.70.187.161</td>\n",
  299. " <td>49460</td>\n",
  300. " <td>TCP</td>\n",
  301. " <td>우즈베키스탄</td>\n",
  302. " <td>1</td>\n",
  303. " <td>Malwr-Resource-29-01-photo.scr.16053101ECSC#</td>\n",
  304. " </tr>\n",
  305. " </tbody>\n",
  306. "</table>\n",
  307. "</div>"
  308. ],
  309. "text/plain": [
  310. " RISK_V2 INST_NM \\\n",
  311. "0 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 계원예술대학교 \n",
  312. "1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 국민대학교 \n",
  313. "2 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 서울여자대학교 \n",
  314. "3 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 서울대학교 \n",
  315. "4 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 동의대학교 \n",
  316. "\n",
  317. " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP \\\n",
  318. "0 Malwr 117.17.214.4 2846 5.9.163.18 \n",
  319. "1 Malwr 104.18.56.68 80 210.123.34.247 \n",
  320. "2 Attack 103.115.42.158 46728 203.246.40.25 \n",
  321. "3 Attack 78.151.86.161 42911 147.46.10.138 \n",
  322. "4 Malwr 213.230.84.85 3666 114.70.187.161 \n",
  323. "\n",
  324. " TW_DMG_PORT ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
  325. "0 50001 TCP 대한민국 1 \n",
  326. "1 51318 TCP 미국 1 \n",
  327. "2 80 TCP 중국 1 \n",
  328. "3 80 TCP 영국 1 \n",
  329. "4 49460 TCP 우즈베키스탄 1 \n",
  330. "\n",
  331. " DRULE_NM \n",
  332. "0 Malwr-Resource-29-01-coinminer.18033003ECSC# \n",
  333. "1 Malwr-Sysinfo-29-01-json.16101205ECSC# \n",
  334. "2 Attack-WebScan-29-01-download(passwd).19092602... \n",
  335. "3 Attack-Scan-29-01-botnet(satori).19061302ECSC# \n",
  336. "4 Malwr-Resource-29-01-photo.scr.16053101ECSC# "
  337. ]
  338. },
  339. "metadata": {},
  340. "execution_count": 348
  341. }
  342. ],
  343. "metadata": {}
  344. },
  345. {
  346. "cell_type": "code",
  347. "execution_count": 349,
  348. "source": [
  349. "##################### NTM section #####################"
  350. ],
  351. "outputs": [],
  352. "metadata": {}
  353. },
  354. {
  355. "cell_type": "code",
  356. "execution_count": 350,
  357. "source": [
  358. "NTM_df=df[df['ACCD_FIND_MTD_CODE']=='1']\n",
  359. "len(NTM_df)"
  360. ],
  361. "outputs": [
  362. {
  363. "output_type": "execute_result",
  364. "data": {
  365. "text/plain": [
  366. "9894"
  367. ]
  368. },
  369. "metadata": {},
  370. "execution_count": 350
  371. }
  372. ],
  373. "metadata": {}
  374. },
  375. {
  376. "cell_type": "code",
  377. "execution_count": 351,
  378. "source": [
  379. "# Pick out it in order to get the asset, risk, intent, black IP out\n",
  380. "RISK_V2=NTM_df['RISK_V2']\n",
  381. "\n",
  382. "RISK_V2_FILTERED=RISK_V2.dropna()\n",
  383. "print(RISK_V2.size)\n",
  384. "print(RISK_V2_FILTERED.size)\n",
  385. "\n",
  386. "\n",
  387. "\n",
  388. "\n"
  389. ],
  390. "outputs": [
  391. {
  392. "output_type": "stream",
  393. "name": "stdout",
  394. "text": [
  395. "9894\n",
  396. "9894\n"
  397. ]
  398. }
  399. ],
  400. "metadata": {}
  401. },
  402. {
  403. "cell_type": "code",
  404. "execution_count": 352,
  405. "source": [
  406. "def filter_assets_value(risk):\n",
  407. " risks=[]\n",
  408. " try:\n",
  409. " for risk_key in risk:\n",
  410. " if 'ASSETS_VAL_' in risk_key and risk[risk_key]:\n",
  411. " risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)\n",
  412. " risks.append(risk_key_desc)\n",
  413. " except:\n",
  414. " print(risk)\n",
  415. " print(type(risk))\n",
  416. " finally:\n",
  417. " return risks\n",
  418. " \n",
  419. " "
  420. ],
  421. "outputs": [],
  422. "metadata": {}
  423. },
  424. {
  425. "cell_type": "code",
  426. "execution_count": 353,
  427. "source": [
  428. "# modified\n",
  429. "def get_asset_desc(asset_field):\n",
  430. " if asset_field == 'ASSETS_VAL_1':\n",
  431. " return '공인-전체IP대역(유선)'\n",
  432. " elif asset_field == 'ASSETS_VAL_2':\n",
  433. " return '공인-전체IP대역(무선)'\n",
  434. " elif asset_field == 'ASSETS_VAL_3':\n",
  435. " return '공인-WEB서버'\n",
  436. " elif asset_field == 'ASSETS_VAL_4':\n",
  437. " return '공인-내부응용서버'\n",
  438. " elif asset_field == 'ASSETS_VAL_5':\n",
  439. " return '공인-DB서버'\n",
  440. " elif asset_field == 'ASSETS_VAL_6':\n",
  441. " return '공인-패치서버'\n",
  442. " elif asset_field == 'ASSETS_VAL_7':\n",
  443. " return '공인-네트워크'\n",
  444. " elif asset_field == 'ASSETS_VAL_8':\n",
  445. " return '공인-보안'\n",
  446. " elif asset_field == 'ASSETS_VAL_9':\n",
  447. " return '공인-업무용PC'\n",
  448. " elif asset_field == 'ASSETS_VAL_10':\n",
  449. " return '공인-비업무용PC'\n",
  450. " elif asset_field == 'ASSETS_VAL_11':\n",
  451. " return '공인-기타'\n",
  452. " elif asset_field == 'ASSETS_VAL_12':\n",
  453. " return '사설-전체IP대역(유선)'\n",
  454. " elif asset_field == 'ASSETS_VAL_13':\n",
  455. " return '사설-전체IP대역(무선)'\n",
  456. " elif asset_field == 'ASSETS_VAL_14':\n",
  457. " return '사설-WEB서버'\n",
  458. " elif asset_field == 'ASSETS_VAL_15':\n",
  459. " return '사설-내부응용서버'\n",
  460. " elif asset_field == 'ASSETS_VAL_16':\n",
  461. " return '사설-DB서버'\n",
  462. " elif asset_field == 'ASSETS_VAL_17':\n",
  463. " return '사설-패치서버'\n",
  464. " elif asset_field == 'ASSETS_VAL_18':\n",
  465. " return '사설-네트워크'\n",
  466. " elif asset_field == 'ASSETS_VAL_19':\n",
  467. " return '사설-보안'\n",
  468. " elif asset_field == 'ASSETS_VAL_20':\n",
  469. " return '사설-업무용PC'\n",
  470. " elif asset_field == 'ASSETS_VAL_21':\n",
  471. " return '사설-비업무용PC'\n",
  472. " elif asset_field == 'ASSETS_VAL_22':\n",
  473. " return '사설-기타'\n",
  474. " else:\n",
  475. " return ''"
  476. ],
  477. "outputs": [],
  478. "metadata": {}
  479. },
  480. {
  481. "cell_type": "code",
  482. "execution_count": 354,
  483. "source": [
  484. "# New assets column\n",
  485. "NTM_df['ASSETS_VAL']=list(map(filter_assets_value, RISK_V2_FILTERED))\n",
  486. "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)\n",
  487. "NTM_df[:1]"
  488. ],
  489. "outputs": [
  490. {
  491. "output_type": "stream",
  492. "name": "stderr",
  493. "text": [
  494. "<ipython-input-354-a30068447e14>:2: SettingWithCopyWarning: \n",
  495. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  496. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  497. "\n",
  498. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  499. " NTM_df['ASSETS_VAL']=list(map(filter_assets_value, RISK_V2_FILTERED))\n",
  500. "<ipython-input-354-a30068447e14>:3: SettingWithCopyWarning: \n",
  501. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  502. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  503. "\n",
  504. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  505. " NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].astype(str)\n"
  506. ]
  507. },
  508. {
  509. "output_type": "execute_result",
  510. "data": {
  511. "text/html": [
  512. "<div>\n",
  513. "<style scoped>\n",
  514. " .dataframe tbody tr th:only-of-type {\n",
  515. " vertical-align: middle;\n",
  516. " }\n",
  517. "\n",
  518. " .dataframe tbody tr th {\n",
  519. " vertical-align: top;\n",
  520. " }\n",
  521. "\n",
  522. " .dataframe thead th {\n",
  523. " text-align: right;\n",
  524. " }\n",
  525. "</style>\n",
  526. "<table border=\"1\" class=\"dataframe\">\n",
  527. " <thead>\n",
  528. " <tr style=\"text-align: right;\">\n",
  529. " <th></th>\n",
  530. " <th>RISK_V2</th>\n",
  531. " <th>INST_NM</th>\n",
  532. " <th>DRULE_ATT_TYPE_CODE1</th>\n",
  533. " <th>TW_ATT_IP</th>\n",
  534. " <th>TW_ATT_PORT</th>\n",
  535. " <th>TW_DMG_IP</th>\n",
  536. " <th>TW_DMG_PORT</th>\n",
  537. " <th>ACCD_DMG_PROTO_NM</th>\n",
  538. " <th>TW_ATT_CT_NM</th>\n",
  539. " <th>ACCD_FIND_MTD_CODE</th>\n",
  540. " <th>DRULE_NM</th>\n",
  541. " <th>ASSETS_VAL</th>\n",
  542. " </tr>\n",
  543. " </thead>\n",
  544. " <tbody>\n",
  545. " <tr>\n",
  546. " <th>0</th>\n",
  547. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  548. " <td>계원예술대학교</td>\n",
  549. " <td>Malwr</td>\n",
  550. " <td>117.17.214.4</td>\n",
  551. " <td>2846</td>\n",
  552. " <td>5.9.163.18</td>\n",
  553. " <td>50001</td>\n",
  554. " <td>TCP</td>\n",
  555. " <td>대한민국</td>\n",
  556. " <td>1</td>\n",
  557. " <td>Malwr-Resource-29-01-coinminer.18033003ECSC#</td>\n",
  558. " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
  559. " </tr>\n",
  560. " </tbody>\n",
  561. "</table>\n",
  562. "</div>"
  563. ],
  564. "text/plain": [
  565. " RISK_V2 INST_NM \\\n",
  566. "0 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 계원예술대학교 \n",
  567. "\n",
  568. " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP TW_DMG_PORT \\\n",
  569. "0 Malwr 117.17.214.4 2846 5.9.163.18 50001 \n",
  570. "\n",
  571. " ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
  572. "0 TCP 대한민국 1 \n",
  573. "\n",
  574. " DRULE_NM \\\n",
  575. "0 Malwr-Resource-29-01-coinminer.18033003ECSC# \n",
  576. "\n",
  577. " ASSETS_VAL \n",
  578. "0 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] "
  579. ]
  580. },
  581. "metadata": {},
  582. "execution_count": 354
  583. }
  584. ],
  585. "metadata": {}
  586. },
  587. {
  588. "cell_type": "code",
  589. "execution_count": 355,
  590. "source": [
  591. "# modified\n",
  592. "def filter_intent(intent):\n",
  593. " intents=[]\n",
  594. " for intent_key in intent:\n",
  595. " if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
  596. " intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
  597. " intents.append(intent_key_desc)\n",
  598. " return intents"
  599. ],
  600. "outputs": [],
  601. "metadata": {}
  602. },
  603. {
  604. "cell_type": "code",
  605. "execution_count": 356,
  606. "source": [
  607. "def get_intent_desc(intent_field):\n",
  608. " if intent_field == 'INTENT_VAL_1':\n",
  609. " return '파괴'\n",
  610. " elif intent_field == 'INTENT_VAL_2':\n",
  611. " return '유출'\n",
  612. " elif intent_field == 'INTENT_VAL_3':\n",
  613. " return '지연'\n",
  614. " elif intent_field == 'INTENT_VAL_4':\n",
  615. " return '잠복'\n",
  616. " elif intent_field == 'INTENT_VAL_5':\n",
  617. " return '단순침입'\n",
  618. " elif intent_field == 'INTENT_VAL_6':\n",
  619. " return 'MD5'\n",
  620. " elif intent_field == 'INTENT_VAL_0':\n",
  621. " return 'Default'\n",
  622. " else:\n",
  623. " return ''"
  624. ],
  625. "outputs": [],
  626. "metadata": {}
  627. },
  628. {
  629. "cell_type": "code",
  630. "execution_count": 357,
  631. "source": [
  632. "# New column of intent value\n",
  633. "NTM_df['INTENT_VAL']=list(map(filter_intent, RISK_V2_FILTERED))\n",
  634. "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)\n",
  635. "NTM_df[:1]"
  636. ],
  637. "outputs": [
  638. {
  639. "output_type": "stream",
  640. "name": "stderr",
  641. "text": [
  642. "<ipython-input-357-8a9ca54377be>:2: SettingWithCopyWarning: \n",
  643. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  644. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  645. "\n",
  646. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  647. " NTM_df['INTENT_VAL']=list(map(filter_intent, RISK_V2_FILTERED))\n",
  648. "<ipython-input-357-8a9ca54377be>:3: SettingWithCopyWarning: \n",
  649. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  650. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  651. "\n",
  652. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  653. " NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].astype(str)\n"
  654. ]
  655. },
  656. {
  657. "output_type": "execute_result",
  658. "data": {
  659. "text/html": [
  660. "<div>\n",
  661. "<style scoped>\n",
  662. " .dataframe tbody tr th:only-of-type {\n",
  663. " vertical-align: middle;\n",
  664. " }\n",
  665. "\n",
  666. " .dataframe tbody tr th {\n",
  667. " vertical-align: top;\n",
  668. " }\n",
  669. "\n",
  670. " .dataframe thead th {\n",
  671. " text-align: right;\n",
  672. " }\n",
  673. "</style>\n",
  674. "<table border=\"1\" class=\"dataframe\">\n",
  675. " <thead>\n",
  676. " <tr style=\"text-align: right;\">\n",
  677. " <th></th>\n",
  678. " <th>RISK_V2</th>\n",
  679. " <th>INST_NM</th>\n",
  680. " <th>DRULE_ATT_TYPE_CODE1</th>\n",
  681. " <th>TW_ATT_IP</th>\n",
  682. " <th>TW_ATT_PORT</th>\n",
  683. " <th>TW_DMG_IP</th>\n",
  684. " <th>TW_DMG_PORT</th>\n",
  685. " <th>ACCD_DMG_PROTO_NM</th>\n",
  686. " <th>TW_ATT_CT_NM</th>\n",
  687. " <th>ACCD_FIND_MTD_CODE</th>\n",
  688. " <th>DRULE_NM</th>\n",
  689. " <th>ASSETS_VAL</th>\n",
  690. " <th>INTENT_VAL</th>\n",
  691. " </tr>\n",
  692. " </thead>\n",
  693. " <tbody>\n",
  694. " <tr>\n",
  695. " <th>0</th>\n",
  696. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  697. " <td>계원예술대학교</td>\n",
  698. " <td>Malwr</td>\n",
  699. " <td>117.17.214.4</td>\n",
  700. " <td>2846</td>\n",
  701. " <td>5.9.163.18</td>\n",
  702. " <td>50001</td>\n",
  703. " <td>TCP</td>\n",
  704. " <td>대한민국</td>\n",
  705. " <td>1</td>\n",
  706. " <td>Malwr-Resource-29-01-coinminer.18033003ECSC#</td>\n",
  707. " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
  708. " <td>['RISK_V2.INTENT_VAL_3=지연']</td>\n",
  709. " </tr>\n",
  710. " </tbody>\n",
  711. "</table>\n",
  712. "</div>"
  713. ],
  714. "text/plain": [
  715. " RISK_V2 INST_NM \\\n",
  716. "0 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 계원예술대학교 \n",
  717. "\n",
  718. " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP TW_DMG_PORT \\\n",
  719. "0 Malwr 117.17.214.4 2846 5.9.163.18 50001 \n",
  720. "\n",
  721. " ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
  722. "0 TCP 대한민국 1 \n",
  723. "\n",
  724. " DRULE_NM \\\n",
  725. "0 Malwr-Resource-29-01-coinminer.18033003ECSC# \n",
  726. "\n",
  727. " ASSETS_VAL INTENT_VAL \n",
  728. "0 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_3=지연'] "
  729. ]
  730. },
  731. "metadata": {},
  732. "execution_count": 357
  733. }
  734. ],
  735. "metadata": {}
  736. },
  737. {
  738. "cell_type": "code",
  739. "execution_count": 358,
  740. "source": [
  741. "# modified\n",
  742. "def filter_source(source):\n",
  743. " sources=[]\n",
  744. " for source_key in source:\n",
  745. " if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
  746. " source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
  747. " sources.append(source_key_desc)\n",
  748. " return sources"
  749. ],
  750. "outputs": [],
  751. "metadata": {}
  752. },
  753. {
  754. "cell_type": "code",
  755. "execution_count": 359,
  756. "source": [
  757. "def get_source_desc(source_field):\n",
  758. " if source_field=='SOURCE_VAL_1':\n",
  759. " return '북한IP'\n",
  760. " if source_field=='SOURCE_VAL_3':\n",
  761. " return 'ECSC Black IP'\n",
  762. " else:\n",
  763. " return ''"
  764. ],
  765. "outputs": [],
  766. "metadata": {}
  767. },
  768. {
  769. "cell_type": "code",
  770. "execution_count": 360,
  771. "source": [
  772. "# New column of SOURCE_VAL value\n",
  773. "NTM_df['SOURCE_VAL']=list(map(filter_source, RISK_V2_FILTERED))\n",
  774. "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n",
  775. "NTM_df[:5]"
  776. ],
  777. "outputs": [
  778. {
  779. "output_type": "stream",
  780. "name": "stderr",
  781. "text": [
  782. "<ipython-input-360-3b33b89bc3d3>:2: SettingWithCopyWarning: \n",
  783. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  784. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  785. "\n",
  786. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  787. " NTM_df['SOURCE_VAL']=list(map(filter_source, RISK_V2_FILTERED))\n",
  788. "<ipython-input-360-3b33b89bc3d3>:3: SettingWithCopyWarning: \n",
  789. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  790. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  791. "\n",
  792. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  793. " NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].astype(str)\n"
  794. ]
  795. },
  796. {
  797. "output_type": "execute_result",
  798. "data": {
  799. "text/html": [
  800. "<div>\n",
  801. "<style scoped>\n",
  802. " .dataframe tbody tr th:only-of-type {\n",
  803. " vertical-align: middle;\n",
  804. " }\n",
  805. "\n",
  806. " .dataframe tbody tr th {\n",
  807. " vertical-align: top;\n",
  808. " }\n",
  809. "\n",
  810. " .dataframe thead th {\n",
  811. " text-align: right;\n",
  812. " }\n",
  813. "</style>\n",
  814. "<table border=\"1\" class=\"dataframe\">\n",
  815. " <thead>\n",
  816. " <tr style=\"text-align: right;\">\n",
  817. " <th></th>\n",
  818. " <th>RISK_V2</th>\n",
  819. " <th>INST_NM</th>\n",
  820. " <th>DRULE_ATT_TYPE_CODE1</th>\n",
  821. " <th>TW_ATT_IP</th>\n",
  822. " <th>TW_ATT_PORT</th>\n",
  823. " <th>TW_DMG_IP</th>\n",
  824. " <th>TW_DMG_PORT</th>\n",
  825. " <th>ACCD_DMG_PROTO_NM</th>\n",
  826. " <th>TW_ATT_CT_NM</th>\n",
  827. " <th>ACCD_FIND_MTD_CODE</th>\n",
  828. " <th>DRULE_NM</th>\n",
  829. " <th>ASSETS_VAL</th>\n",
  830. " <th>INTENT_VAL</th>\n",
  831. " <th>SOURCE_VAL</th>\n",
  832. " </tr>\n",
  833. " </thead>\n",
  834. " <tbody>\n",
  835. " <tr>\n",
  836. " <th>0</th>\n",
  837. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  838. " <td>계원예술대학교</td>\n",
  839. " <td>Malwr</td>\n",
  840. " <td>117.17.214.4</td>\n",
  841. " <td>2846</td>\n",
  842. " <td>5.9.163.18</td>\n",
  843. " <td>50001</td>\n",
  844. " <td>TCP</td>\n",
  845. " <td>대한민국</td>\n",
  846. " <td>1</td>\n",
  847. " <td>Malwr-Resource-29-01-coinminer.18033003ECSC#</td>\n",
  848. " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
  849. " <td>['RISK_V2.INTENT_VAL_3=지연']</td>\n",
  850. " <td>[]</td>\n",
  851. " </tr>\n",
  852. " <tr>\n",
  853. " <th>1</th>\n",
  854. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  855. " <td>국민대학교</td>\n",
  856. " <td>Malwr</td>\n",
  857. " <td>104.18.56.68</td>\n",
  858. " <td>80</td>\n",
  859. " <td>210.123.34.247</td>\n",
  860. " <td>51318</td>\n",
  861. " <td>TCP</td>\n",
  862. " <td>미국</td>\n",
  863. " <td>1</td>\n",
  864. " <td>Malwr-Sysinfo-29-01-json.16101205ECSC#</td>\n",
  865. " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
  866. " <td>['RISK_V2.INTENT_VAL_2=유출']</td>\n",
  867. " <td>[]</td>\n",
  868. " </tr>\n",
  869. " <tr>\n",
  870. " <th>2</th>\n",
  871. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  872. " <td>서울여자대학교</td>\n",
  873. " <td>Attack</td>\n",
  874. " <td>103.115.42.158</td>\n",
  875. " <td>46728</td>\n",
  876. " <td>203.246.40.25</td>\n",
  877. " <td>80</td>\n",
  878. " <td>TCP</td>\n",
  879. " <td>중국</td>\n",
  880. " <td>1</td>\n",
  881. " <td>Attack-WebScan-29-01-download(passwd).19092602...</td>\n",
  882. " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V...</td>\n",
  883. " <td>['RISK_V2.INTENT_VAL_5=단순침입']</td>\n",
  884. " <td>[]</td>\n",
  885. " </tr>\n",
  886. " <tr>\n",
  887. " <th>3</th>\n",
  888. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  889. " <td>서울대학교</td>\n",
  890. " <td>Attack</td>\n",
  891. " <td>78.151.86.161</td>\n",
  892. " <td>42911</td>\n",
  893. " <td>147.46.10.138</td>\n",
  894. " <td>80</td>\n",
  895. " <td>TCP</td>\n",
  896. " <td>영국</td>\n",
  897. " <td>1</td>\n",
  898. " <td>Attack-Scan-29-01-botnet(satori).19061302ECSC#</td>\n",
  899. " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V...</td>\n",
  900. " <td>['RISK_V2.INTENT_VAL_5=단순침입']</td>\n",
  901. " <td>[]</td>\n",
  902. " </tr>\n",
  903. " <tr>\n",
  904. " <th>4</th>\n",
  905. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  906. " <td>동의대학교</td>\n",
  907. " <td>Malwr</td>\n",
  908. " <td>213.230.84.85</td>\n",
  909. " <td>3666</td>\n",
  910. " <td>114.70.187.161</td>\n",
  911. " <td>49460</td>\n",
  912. " <td>TCP</td>\n",
  913. " <td>우즈베키스탄</td>\n",
  914. " <td>1</td>\n",
  915. " <td>Malwr-Resource-29-01-photo.scr.16053101ECSC#</td>\n",
  916. " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V...</td>\n",
  917. " <td>['RISK_V2.INTENT_VAL_3=지연']</td>\n",
  918. " <td>[]</td>\n",
  919. " </tr>\n",
  920. " </tbody>\n",
  921. "</table>\n",
  922. "</div>"
  923. ],
  924. "text/plain": [
  925. " RISK_V2 INST_NM \\\n",
  926. "0 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 계원예술대학교 \n",
  927. "1 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 국민대학교 \n",
  928. "2 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 서울여자대학교 \n",
  929. "3 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 서울대학교 \n",
  930. "4 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 동의대학교 \n",
  931. "\n",
  932. " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP \\\n",
  933. "0 Malwr 117.17.214.4 2846 5.9.163.18 \n",
  934. "1 Malwr 104.18.56.68 80 210.123.34.247 \n",
  935. "2 Attack 103.115.42.158 46728 203.246.40.25 \n",
  936. "3 Attack 78.151.86.161 42911 147.46.10.138 \n",
  937. "4 Malwr 213.230.84.85 3666 114.70.187.161 \n",
  938. "\n",
  939. " TW_DMG_PORT ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
  940. "0 50001 TCP 대한민국 1 \n",
  941. "1 51318 TCP 미국 1 \n",
  942. "2 80 TCP 중국 1 \n",
  943. "3 80 TCP 영국 1 \n",
  944. "4 49460 TCP 우즈베키스탄 1 \n",
  945. "\n",
  946. " DRULE_NM \\\n",
  947. "0 Malwr-Resource-29-01-coinminer.18033003ECSC# \n",
  948. "1 Malwr-Sysinfo-29-01-json.16101205ECSC# \n",
  949. "2 Attack-WebScan-29-01-download(passwd).19092602... \n",
  950. "3 Attack-Scan-29-01-botnet(satori).19061302ECSC# \n",
  951. "4 Malwr-Resource-29-01-photo.scr.16053101ECSC# \n",
  952. "\n",
  953. " ASSETS_VAL \\\n",
  954. "0 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] \n",
  955. "1 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] \n",
  956. "2 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V... \n",
  957. "3 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V... \n",
  958. "4 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V... \n",
  959. "\n",
  960. " INTENT_VAL SOURCE_VAL \n",
  961. "0 ['RISK_V2.INTENT_VAL_3=지연'] [] \n",
  962. "1 ['RISK_V2.INTENT_VAL_2=유출'] [] \n",
  963. "2 ['RISK_V2.INTENT_VAL_5=단순침입'] [] \n",
  964. "3 ['RISK_V2.INTENT_VAL_5=단순침입'] [] \n",
  965. "4 ['RISK_V2.INTENT_VAL_3=지연'] [] "
  966. ]
  967. },
  968. "metadata": {},
  969. "execution_count": 360
  970. }
  971. ],
  972. "metadata": {}
  973. },
  974. {
  975. "cell_type": "code",
  976. "execution_count": 361,
  977. "source": [
  978. "NTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
  979. "NTM_df.columns"
  980. ],
  981. "outputs": [
  982. {
  983. "output_type": "stream",
  984. "name": "stderr",
  985. "text": [
  986. "/Users/joohyunyoon/.pyenv/versions/anaconda3-2021.05/lib/python3.8/site-packages/pandas/core/frame.py:4308: SettingWithCopyWarning: \n",
  987. "A value is trying to be set on a copy of a slice from a DataFrame\n",
  988. "\n",
  989. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  990. " return super().drop(\n"
  991. ]
  992. },
  993. {
  994. "output_type": "execute_result",
  995. "data": {
  996. "text/plain": [
  997. "Index(['INST_NM', 'DRULE_ATT_TYPE_CODE1', 'TW_ATT_IP', 'TW_ATT_PORT',\n",
  998. " 'TW_DMG_IP', 'TW_DMG_PORT', 'ACCD_DMG_PROTO_NM', 'TW_ATT_CT_NM',\n",
  999. " 'ACCD_FIND_MTD_CODE', 'DRULE_NM', 'ASSETS_VAL', 'INTENT_VAL',\n",
  1000. " 'SOURCE_VAL'],\n",
  1001. " dtype='object')"
  1002. ]
  1003. },
  1004. "metadata": {},
  1005. "execution_count": 361
  1006. }
  1007. ],
  1008. "metadata": {}
  1009. },
  1010. {
  1011. "cell_type": "code",
  1012. "execution_count": 362,
  1013. "source": [
  1014. "# It should be 13 columns in total\n",
  1015. "\n",
  1016. "# 1. 기관 INST_NM\n",
  1017. "# 2. 공격 DRULE_ATT_TYPE_CODE1\n",
  1018. "# 3. 자산 ASSETS_VAL\n",
  1019. "# 4. 위협공격ip TW_ATT_IP\n",
  1020. "# 5. 위협공격port TW_ATT_PORT\n",
  1021. "# 6. 위협피해ip TW_DMG_IP\n",
  1022. "# 7. 위협피해port TW_DMG_PORT\n",
  1023. "# 8. 위협피해프로토콜 ACCD_DMG_PROTO_NM\n",
  1024. "# 9. 공격국가 TW_ATT_CT_NM\n",
  1025. "# 10. 의도(7개) INTENT_VAL\n",
  1026. "# 11. IP/URL 가중치 SOURCE_VAL\n",
  1027. "# 12. 장비 ACCD_FIND_MTD_CODE\n",
  1028. "# 13. 탐지규칙명 DRULE_NM"
  1029. ],
  1030. "outputs": [],
  1031. "metadata": {}
  1032. },
  1033. {
  1034. "cell_type": "markdown",
  1035. "source": [],
  1036. "metadata": {}
  1037. },
  1038. {
  1039. "cell_type": "code",
  1040. "execution_count": 363,
  1041. "source": [
  1042. "NTM_df.isna().sum()"
  1043. ],
  1044. "outputs": [
  1045. {
  1046. "output_type": "execute_result",
  1047. "data": {
  1048. "text/plain": [
  1049. "INST_NM 0\n",
  1050. "DRULE_ATT_TYPE_CODE1 0\n",
  1051. "TW_ATT_IP 0\n",
  1052. "TW_ATT_PORT 0\n",
  1053. "TW_DMG_IP 0\n",
  1054. "TW_DMG_PORT 0\n",
  1055. "ACCD_DMG_PROTO_NM 0\n",
  1056. "TW_ATT_CT_NM 0\n",
  1057. "ACCD_FIND_MTD_CODE 0\n",
  1058. "DRULE_NM 0\n",
  1059. "ASSETS_VAL 0\n",
  1060. "INTENT_VAL 0\n",
  1061. "SOURCE_VAL 0\n",
  1062. "dtype: int64"
  1063. ]
  1064. },
  1065. "metadata": {},
  1066. "execution_count": 363
  1067. }
  1068. ],
  1069. "metadata": {}
  1070. },
  1071. {
  1072. "cell_type": "code",
  1073. "execution_count": 364,
  1074. "source": [
  1075. "# Change the Nan to zero\n",
  1076. "NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
  1077. "NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')\n",
  1078. "NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
  1079. "NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
  1080. "NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
  1081. "NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
  1082. "NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
  1083. "NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
  1084. "NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
  1085. "NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)\n",
  1086. "NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
  1087. "NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')"
  1088. ],
  1089. "outputs": [
  1090. {
  1091. "output_type": "stream",
  1092. "name": "stderr",
  1093. "text": [
  1094. "<ipython-input-364-a775405fb7f0>:2: SettingWithCopyWarning: \n",
  1095. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1096. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1097. "\n",
  1098. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1099. " NTM_df['ACCD_DMG_PROTO_NM']=NTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
  1100. "<ipython-input-364-a775405fb7f0>:3: SettingWithCopyWarning: \n",
  1101. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1102. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1103. "\n",
  1104. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1105. " NTM_df['INST_NM']=NTM_df['INST_NM'].replace(np.nan,'')\n",
  1106. "<ipython-input-364-a775405fb7f0>:4: SettingWithCopyWarning: \n",
  1107. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1108. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1109. "\n",
  1110. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1111. " NTM_df['DRULE_ATT_TYPE_CODE1']=NTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
  1112. "<ipython-input-364-a775405fb7f0>:5: SettingWithCopyWarning: \n",
  1113. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1114. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1115. "\n",
  1116. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1117. " NTM_df['TW_ATT_IP']=NTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
  1118. "<ipython-input-364-a775405fb7f0>:6: SettingWithCopyWarning: \n",
  1119. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1120. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1121. "\n",
  1122. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1123. " NTM_df['TW_ATT_PORT']=NTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
  1124. "<ipython-input-364-a775405fb7f0>:7: SettingWithCopyWarning: \n",
  1125. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1126. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1127. "\n",
  1128. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1129. " NTM_df['TW_DMG_IP']=NTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
  1130. "<ipython-input-364-a775405fb7f0>:8: SettingWithCopyWarning: \n",
  1131. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1132. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1133. "\n",
  1134. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1135. " NTM_df['TW_DMG_PORT']=NTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
  1136. "<ipython-input-364-a775405fb7f0>:9: SettingWithCopyWarning: \n",
  1137. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1138. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1139. "\n",
  1140. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1141. " NTM_df['TW_ATT_CT_NM']=NTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
  1142. "<ipython-input-364-a775405fb7f0>:10: SettingWithCopyWarning: \n",
  1143. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1144. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1145. "\n",
  1146. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1147. " NTM_df['ASSETS_VAL']=NTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
  1148. "<ipython-input-364-a775405fb7f0>:11: SettingWithCopyWarning: \n",
  1149. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1150. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1151. "\n",
  1152. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1153. " NTM_df['INTENT_VAL']=NTM_df['INTENT_VAL'].replace(np.nan,0)\n",
  1154. "<ipython-input-364-a775405fb7f0>:12: SettingWithCopyWarning: \n",
  1155. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1156. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1157. "\n",
  1158. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1159. " NTM_df['SOURCE_VAL']=NTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
  1160. "<ipython-input-364-a775405fb7f0>:13: SettingWithCopyWarning: \n",
  1161. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1162. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1163. "\n",
  1164. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1165. " NTM_df['DRULE_NM']=NTM_df['DRULE_NM'].replace(np.nan,'')\n"
  1166. ]
  1167. }
  1168. ],
  1169. "metadata": {}
  1170. },
  1171. {
  1172. "cell_type": "code",
  1173. "execution_count": 365,
  1174. "source": [
  1175. "# Check NaN out again\n",
  1176. "NTM_df.isna().sum()"
  1177. ],
  1178. "outputs": [
  1179. {
  1180. "output_type": "execute_result",
  1181. "data": {
  1182. "text/plain": [
  1183. "INST_NM 0\n",
  1184. "DRULE_ATT_TYPE_CODE1 0\n",
  1185. "TW_ATT_IP 0\n",
  1186. "TW_ATT_PORT 0\n",
  1187. "TW_DMG_IP 0\n",
  1188. "TW_DMG_PORT 0\n",
  1189. "ACCD_DMG_PROTO_NM 0\n",
  1190. "TW_ATT_CT_NM 0\n",
  1191. "ACCD_FIND_MTD_CODE 0\n",
  1192. "DRULE_NM 0\n",
  1193. "ASSETS_VAL 0\n",
  1194. "INTENT_VAL 0\n",
  1195. "SOURCE_VAL 0\n",
  1196. "dtype: int64"
  1197. ]
  1198. },
  1199. "metadata": {},
  1200. "execution_count": 365
  1201. }
  1202. ],
  1203. "metadata": {}
  1204. },
  1205. {
  1206. "cell_type": "code",
  1207. "execution_count": 366,
  1208. "source": [
  1209. "# # Merge all\n",
  1210. "\n",
  1211. "# # Make one string from all of elements\n",
  1212. "NTM_df['Combined']=NTM_df['INST_NM'].astype(str)+' '+NTM_df['TW_ATT_IP'].astype(str)+' '+NTM_df['TW_ATT_PORT'].astype(str)+' '+NTM_df['TW_DMG_IP'].astype(str)+' '+NTM_df['TW_DMG_PORT'].astype(str) +' '+NTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+NTM_df['TW_ATT_CT_NM']+' '+NTM_df['ASSETS_VAL']+' '+NTM_df['INTENT_VAL']+' '+NTM_df['SOURCE_VAL']+' '+NTM_df['DRULE_ATT_TYPE_CODE1']+' '+NTM_df['DRULE_NM']\n",
  1213. "\n",
  1214. "NTM_com=NTM_df['Combined']\n",
  1215. "NTM_com[:10]\n"
  1216. ],
  1217. "outputs": [
  1218. {
  1219. "output_type": "stream",
  1220. "name": "stderr",
  1221. "text": [
  1222. "<ipython-input-366-d53cc1e7cfac>:4: SettingWithCopyWarning: \n",
  1223. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  1224. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  1225. "\n",
  1226. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  1227. " NTM_df['Combined']=NTM_df['INST_NM'].astype(str)+' '+NTM_df['TW_ATT_IP'].astype(str)+' '+NTM_df['TW_ATT_PORT'].astype(str)+' '+NTM_df['TW_DMG_IP'].astype(str)+' '+NTM_df['TW_DMG_PORT'].astype(str) +' '+NTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+NTM_df['TW_ATT_CT_NM']+' '+NTM_df['ASSETS_VAL']+' '+NTM_df['INTENT_VAL']+' '+NTM_df['SOURCE_VAL']+' '+NTM_df['DRULE_ATT_TYPE_CODE1']+' '+NTM_df['DRULE_NM']\n"
  1228. ]
  1229. },
  1230. {
  1231. "output_type": "execute_result",
  1232. "data": {
  1233. "text/plain": [
  1234. "0 계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TCP...\n",
  1235. "1 국민대학교 104.18.56.68 80 210.123.34.247 51318 TCP...\n",
  1236. "2 서울여자대학교 103.115.42.158 46728 203.246.40.25 80 ...\n",
  1237. "3 서울대학교 78.151.86.161 42911 147.46.10.138 80 TCP...\n",
  1238. "4 동의대학교 213.230.84.85 3666 114.70.187.161 49460 ...\n",
  1239. "5 원광대학교 34.70.128.113 63026 123.108.17.41 80 TCP...\n",
  1240. "6 중원대학교 36.92.9.106 44683 121.191.149.203 80 TCP...\n",
  1241. "7 서울대학교 45.55.60.73 55082 147.47.106.234 80 TCP ...\n",
  1242. "9 숙명여자대학교 192.144.207.37 10909 203.252.201.16 80...\n",
  1243. "10 서울특별시교육청 218.153.168.50 11422 202.171.250.27 8...\n",
  1244. "Name: Combined, dtype: object"
  1245. ]
  1246. },
  1247. "metadata": {},
  1248. "execution_count": 366
  1249. }
  1250. ],
  1251. "metadata": {}
  1252. },
  1253. {
  1254. "cell_type": "code",
  1255. "execution_count": 367,
  1256. "source": [
  1257. "# Change the type to DataFrame\n",
  1258. "NTM_to_df=pd.DataFrame(NTM_com)\n",
  1259. "NTM_to_df[:5]"
  1260. ],
  1261. "outputs": [
  1262. {
  1263. "output_type": "execute_result",
  1264. "data": {
  1265. "text/html": [
  1266. "<div>\n",
  1267. "<style scoped>\n",
  1268. " .dataframe tbody tr th:only-of-type {\n",
  1269. " vertical-align: middle;\n",
  1270. " }\n",
  1271. "\n",
  1272. " .dataframe tbody tr th {\n",
  1273. " vertical-align: top;\n",
  1274. " }\n",
  1275. "\n",
  1276. " .dataframe thead th {\n",
  1277. " text-align: right;\n",
  1278. " }\n",
  1279. "</style>\n",
  1280. "<table border=\"1\" class=\"dataframe\">\n",
  1281. " <thead>\n",
  1282. " <tr style=\"text-align: right;\">\n",
  1283. " <th></th>\n",
  1284. " <th>Combined</th>\n",
  1285. " </tr>\n",
  1286. " </thead>\n",
  1287. " <tbody>\n",
  1288. " <tr>\n",
  1289. " <th>0</th>\n",
  1290. " <td>계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TCP...</td>\n",
  1291. " </tr>\n",
  1292. " <tr>\n",
  1293. " <th>1</th>\n",
  1294. " <td>국민대학교 104.18.56.68 80 210.123.34.247 51318 TCP...</td>\n",
  1295. " </tr>\n",
  1296. " <tr>\n",
  1297. " <th>2</th>\n",
  1298. " <td>서울여자대학교 103.115.42.158 46728 203.246.40.25 80 ...</td>\n",
  1299. " </tr>\n",
  1300. " <tr>\n",
  1301. " <th>3</th>\n",
  1302. " <td>서울대학교 78.151.86.161 42911 147.46.10.138 80 TCP...</td>\n",
  1303. " </tr>\n",
  1304. " <tr>\n",
  1305. " <th>4</th>\n",
  1306. " <td>동의대학교 213.230.84.85 3666 114.70.187.161 49460 ...</td>\n",
  1307. " </tr>\n",
  1308. " </tbody>\n",
  1309. "</table>\n",
  1310. "</div>"
  1311. ],
  1312. "text/plain": [
  1313. " Combined\n",
  1314. "0 계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TCP...\n",
  1315. "1 국민대학교 104.18.56.68 80 210.123.34.247 51318 TCP...\n",
  1316. "2 서울여자대학교 103.115.42.158 46728 203.246.40.25 80 ...\n",
  1317. "3 서울대학교 78.151.86.161 42911 147.46.10.138 80 TCP...\n",
  1318. "4 동의대학교 213.230.84.85 3666 114.70.187.161 49460 ..."
  1319. ]
  1320. },
  1321. "metadata": {},
  1322. "execution_count": 367
  1323. }
  1324. ],
  1325. "metadata": {}
  1326. },
  1327. {
  1328. "cell_type": "code",
  1329. "execution_count": 368,
  1330. "source": [
  1331. "# Change the type to list in order to apply the algorithm(nested list)\n",
  1332. "NTM_tolist=NTM_to_df.values.tolist()\n",
  1333. "NTM_tolist[:5]"
  1334. ],
  1335. "outputs": [
  1336. {
  1337. "output_type": "execute_result",
  1338. "data": {
  1339. "text/plain": [
  1340. "[[\"계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_3=지연'] [] Malwr Malwr-Resource-29-01-coinminer.18033003ECSC#\"],\n",
  1341. " [\"국민대학교 104.18.56.68 80 210.123.34.247 51318 TCP 미국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_2=유출'] [] Malwr Malwr-Sysinfo-29-01-json.16101205ECSC#\"],\n",
  1342. " [\"서울여자대학교 103.115.42.158 46728 203.246.40.25 80 TCP 중국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_4=공인-내부응용서버'] ['RISK_V2.INTENT_VAL_5=단순침입'] [] Attack Attack-WebScan-29-01-download(passwd).19092602ECSC#\"],\n",
  1343. " [\"서울대학교 78.151.86.161 42911 147.46.10.138 80 TCP 영국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_10=공인-비업무용PC'] ['RISK_V2.INTENT_VAL_5=단순침입'] [] Attack Attack-Scan-29-01-botnet(satori).19061302ECSC#\"],\n",
  1344. " [\"동의대학교 213.230.84.85 3666 114.70.187.161 49460 TCP 우즈베키스탄 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_9=공인-업무용PC'] ['RISK_V2.INTENT_VAL_3=지연'] [] Malwr Malwr-Resource-29-01-photo.scr.16053101ECSC#\"]]"
  1345. ]
  1346. },
  1347. "metadata": {},
  1348. "execution_count": 368
  1349. }
  1350. ],
  1351. "metadata": {}
  1352. },
  1353. {
  1354. "cell_type": "code",
  1355. "execution_count": 369,
  1356. "source": [
  1357. "from prefixspan import PrefixSpan"
  1358. ],
  1359. "outputs": [],
  1360. "metadata": {}
  1361. },
  1362. {
  1363. "cell_type": "code",
  1364. "execution_count": 370,
  1365. "source": [
  1366. "# Apply prefixspan\n",
  1367. "PrefixSpan_NTM = PrefixSpan(NTM_tolist)\n",
  1368. "\n",
  1369. "###### Interchangeable ######\n",
  1370. "# Get any over frequency 1 \n",
  1371. "prefix_NTM=PrefixSpan_NTM.frequent(1)\n",
  1372. "prefix_NTM[:3]"
  1373. ],
  1374. "outputs": [
  1375. {
  1376. "output_type": "execute_result",
  1377. "data": {
  1378. "text/plain": [
  1379. "[(1,\n",
  1380. " [\"계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_3=지연'] [] Malwr Malwr-Resource-29-01-coinminer.18033003ECSC#\"]),\n",
  1381. " (1,\n",
  1382. " [\"국민대학교 104.18.56.68 80 210.123.34.247 51318 TCP 미국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_2=유출'] [] Malwr Malwr-Sysinfo-29-01-json.16101205ECSC#\"]),\n",
  1383. " (1,\n",
  1384. " [\"서울여자대학교 103.115.42.158 46728 203.246.40.25 80 TCP 중국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_4=공인-내부응용서버'] ['RISK_V2.INTENT_VAL_5=단순침입'] [] Attack Attack-WebScan-29-01-download(passwd).19092602ECSC#\"])]"
  1385. ]
  1386. },
  1387. "metadata": {},
  1388. "execution_count": 370
  1389. }
  1390. ],
  1391. "metadata": {}
  1392. },
  1393. {
  1394. "cell_type": "code",
  1395. "execution_count": 371,
  1396. "source": [
  1397. "# Put the result to DataFrame\n",
  1398. "prefix_NTM_df=pd.DataFrame(prefix_NTM)\n",
  1399. "prefix_NTM_df[:5]"
  1400. ],
  1401. "outputs": [
  1402. {
  1403. "output_type": "execute_result",
  1404. "data": {
  1405. "text/html": [
  1406. "<div>\n",
  1407. "<style scoped>\n",
  1408. " .dataframe tbody tr th:only-of-type {\n",
  1409. " vertical-align: middle;\n",
  1410. " }\n",
  1411. "\n",
  1412. " .dataframe tbody tr th {\n",
  1413. " vertical-align: top;\n",
  1414. " }\n",
  1415. "\n",
  1416. " .dataframe thead th {\n",
  1417. " text-align: right;\n",
  1418. " }\n",
  1419. "</style>\n",
  1420. "<table border=\"1\" class=\"dataframe\">\n",
  1421. " <thead>\n",
  1422. " <tr style=\"text-align: right;\">\n",
  1423. " <th></th>\n",
  1424. " <th>0</th>\n",
  1425. " <th>1</th>\n",
  1426. " </tr>\n",
  1427. " </thead>\n",
  1428. " <tbody>\n",
  1429. " <tr>\n",
  1430. " <th>0</th>\n",
  1431. " <td>1</td>\n",
  1432. " <td>[계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC...</td>\n",
  1433. " </tr>\n",
  1434. " <tr>\n",
  1435. " <th>1</th>\n",
  1436. " <td>1</td>\n",
  1437. " <td>[국민대학교 104.18.56.68 80 210.123.34.247 51318 TC...</td>\n",
  1438. " </tr>\n",
  1439. " <tr>\n",
  1440. " <th>2</th>\n",
  1441. " <td>1</td>\n",
  1442. " <td>[서울여자대학교 103.115.42.158 46728 203.246.40.25 80...</td>\n",
  1443. " </tr>\n",
  1444. " <tr>\n",
  1445. " <th>3</th>\n",
  1446. " <td>1</td>\n",
  1447. " <td>[서울대학교 78.151.86.161 42911 147.46.10.138 80 TC...</td>\n",
  1448. " </tr>\n",
  1449. " <tr>\n",
  1450. " <th>4</th>\n",
  1451. " <td>1</td>\n",
  1452. " <td>[동의대학교 213.230.84.85 3666 114.70.187.161 49460...</td>\n",
  1453. " </tr>\n",
  1454. " </tbody>\n",
  1455. "</table>\n",
  1456. "</div>"
  1457. ],
  1458. "text/plain": [
  1459. " 0 1\n",
  1460. "0 1 [계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC...\n",
  1461. "1 1 [국민대학교 104.18.56.68 80 210.123.34.247 51318 TC...\n",
  1462. "2 1 [서울여자대학교 103.115.42.158 46728 203.246.40.25 80...\n",
  1463. "3 1 [서울대학교 78.151.86.161 42911 147.46.10.138 80 TC...\n",
  1464. "4 1 [동의대학교 213.230.84.85 3666 114.70.187.161 49460..."
  1465. ]
  1466. },
  1467. "metadata": {},
  1468. "execution_count": 371
  1469. }
  1470. ],
  1471. "metadata": {}
  1472. },
  1473. {
  1474. "cell_type": "code",
  1475. "execution_count": 372,
  1476. "source": [
  1477. "# Change the columns name\n",
  1478. "prefix_NTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
  1479. "\n",
  1480. "# Make the new column for filling the Effect\n",
  1481. "prefix_NTM_df['Effect']=np.nan\n",
  1482. "\n",
  1483. "# Change the order of columns\n",
  1484. "prefix_NTM_df=prefix_NTM_df[['Cause','Effect','Frequency']]\n",
  1485. "prefix_NTM_df[:2]"
  1486. ],
  1487. "outputs": [
  1488. {
  1489. "output_type": "execute_result",
  1490. "data": {
  1491. "text/html": [
  1492. "<div>\n",
  1493. "<style scoped>\n",
  1494. " .dataframe tbody tr th:only-of-type {\n",
  1495. " vertical-align: middle;\n",
  1496. " }\n",
  1497. "\n",
  1498. " .dataframe tbody tr th {\n",
  1499. " vertical-align: top;\n",
  1500. " }\n",
  1501. "\n",
  1502. " .dataframe thead th {\n",
  1503. " text-align: right;\n",
  1504. " }\n",
  1505. "</style>\n",
  1506. "<table border=\"1\" class=\"dataframe\">\n",
  1507. " <thead>\n",
  1508. " <tr style=\"text-align: right;\">\n",
  1509. " <th></th>\n",
  1510. " <th>Cause</th>\n",
  1511. " <th>Effect</th>\n",
  1512. " <th>Frequency</th>\n",
  1513. " </tr>\n",
  1514. " </thead>\n",
  1515. " <tbody>\n",
  1516. " <tr>\n",
  1517. " <th>0</th>\n",
  1518. " <td>[계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC...</td>\n",
  1519. " <td>NaN</td>\n",
  1520. " <td>1</td>\n",
  1521. " </tr>\n",
  1522. " <tr>\n",
  1523. " <th>1</th>\n",
  1524. " <td>[국민대학교 104.18.56.68 80 210.123.34.247 51318 TC...</td>\n",
  1525. " <td>NaN</td>\n",
  1526. " <td>1</td>\n",
  1527. " </tr>\n",
  1528. " </tbody>\n",
  1529. "</table>\n",
  1530. "</div>"
  1531. ],
  1532. "text/plain": [
  1533. " Cause Effect Frequency\n",
  1534. "0 [계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC... NaN 1\n",
  1535. "1 [국민대학교 104.18.56.68 80 210.123.34.247 51318 TC... NaN 1"
  1536. ]
  1537. },
  1538. "metadata": {},
  1539. "execution_count": 372
  1540. }
  1541. ],
  1542. "metadata": {}
  1543. },
  1544. {
  1545. "cell_type": "code",
  1546. "execution_count": 373,
  1547. "source": [
  1548. "# Define the function that find the rule name \n",
  1549. "def generate_cause(cell):\n",
  1550. " drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
  1551. " for drule in drules:\n",
  1552. " if ' '+drule in cell[0]:\n",
  1553. " return drule \n",
  1554. " return ''\n",
  1555. " \n",
  1556. "# Mapping the rule name with cause that is the effect\n",
  1557. "effect=list(map(generate_cause, prefix_NTM_df.Cause))\n",
  1558. "\n",
  1559. "# Assign the rule name as an effect\n",
  1560. "prefix_NTM_df['Effect']=effect\n",
  1561. "prefix_NTM_df.sort_values(by=['Frequency'],ascending=False)"
  1562. ],
  1563. "outputs": [
  1564. {
  1565. "output_type": "execute_result",
  1566. "data": {
  1567. "text/html": [
  1568. "<div>\n",
  1569. "<style scoped>\n",
  1570. " .dataframe tbody tr th:only-of-type {\n",
  1571. " vertical-align: middle;\n",
  1572. " }\n",
  1573. "\n",
  1574. " .dataframe tbody tr th {\n",
  1575. " vertical-align: top;\n",
  1576. " }\n",
  1577. "\n",
  1578. " .dataframe thead th {\n",
  1579. " text-align: right;\n",
  1580. " }\n",
  1581. "</style>\n",
  1582. "<table border=\"1\" class=\"dataframe\">\n",
  1583. " <thead>\n",
  1584. " <tr style=\"text-align: right;\">\n",
  1585. " <th></th>\n",
  1586. " <th>Cause</th>\n",
  1587. " <th>Effect</th>\n",
  1588. " <th>Frequency</th>\n",
  1589. " </tr>\n",
  1590. " </thead>\n",
  1591. " <tbody>\n",
  1592. " <tr>\n",
  1593. " <th>393</th>\n",
  1594. " <td>[중앙대학교 165.194.8.12 49159 219.153.48.112 1188 ...</td>\n",
  1595. " <td>Malwr</td>\n",
  1596. " <td>4</td>\n",
  1597. " </tr>\n",
  1598. " <tr>\n",
  1599. " <th>1831</th>\n",
  1600. " <td>[상지대학교 158.69.38.240 62834 220.149.189.72 80 T...</td>\n",
  1601. " <td>Attack</td>\n",
  1602. " <td>2</td>\n",
  1603. " </tr>\n",
  1604. " <tr>\n",
  1605. " <th>6517</th>\n",
  1606. " <td>[중앙대학교 165.194.31.20 49157 219.153.48.112 1188...</td>\n",
  1607. " <td>Malwr</td>\n",
  1608. " <td>2</td>\n",
  1609. " </tr>\n",
  1610. " <tr>\n",
  1611. " <th>4198</th>\n",
  1612. " <td>[한세대학교 210.12.116.124 22564 220.68.231.48 80 T...</td>\n",
  1613. " <td>Attack</td>\n",
  1614. " <td>2</td>\n",
  1615. " </tr>\n",
  1616. " <tr>\n",
  1617. " <th>0</th>\n",
  1618. " <td>[계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC...</td>\n",
  1619. " <td>Malwr</td>\n",
  1620. " <td>1</td>\n",
  1621. " </tr>\n",
  1622. " <tr>\n",
  1623. " <th>...</th>\n",
  1624. " <td>...</td>\n",
  1625. " <td>...</td>\n",
  1626. " <td>...</td>\n",
  1627. " </tr>\n",
  1628. " <tr>\n",
  1629. " <th>3298</th>\n",
  1630. " <td>[경기대학교 50.62.176.64 40577 203.249.22.27 80 TCP...</td>\n",
  1631. " <td>Attack</td>\n",
  1632. " <td>1</td>\n",
  1633. " </tr>\n",
  1634. " <tr>\n",
  1635. " <th>3299</th>\n",
  1636. " <td>[한세대학교 103.242.119.217 44889 220.68.249.69 80 ...</td>\n",
  1637. " <td>Attack</td>\n",
  1638. " <td>1</td>\n",
  1639. " </tr>\n",
  1640. " <tr>\n",
  1641. " <th>3300</th>\n",
  1642. " <td>[동의대학교 185.247.181.7 37614 113.198.245.31 80 T...</td>\n",
  1643. " <td>Attack</td>\n",
  1644. " <td>1</td>\n",
  1645. " </tr>\n",
  1646. " <tr>\n",
  1647. " <th>3301</th>\n",
  1648. " <td>[숙명여자대학교 46.119.174.102 51876 203.252.201.77 8...</td>\n",
  1649. " <td>Attack</td>\n",
  1650. " <td>1</td>\n",
  1651. " </tr>\n",
  1652. " <tr>\n",
  1653. " <th>9887</th>\n",
  1654. " <td>[아주대학교 187.101.10.229 49802 202.30.20.81 80 TC...</td>\n",
  1655. " <td>Attack</td>\n",
  1656. " <td>1</td>\n",
  1657. " </tr>\n",
  1658. " </tbody>\n",
  1659. "</table>\n",
  1660. "<p>9888 rows × 3 columns</p>\n",
  1661. "</div>"
  1662. ],
  1663. "text/plain": [
  1664. " Cause Effect Frequency\n",
  1665. "393 [중앙대학교 165.194.8.12 49159 219.153.48.112 1188 ... Malwr 4\n",
  1666. "1831 [상지대학교 158.69.38.240 62834 220.149.189.72 80 T... Attack 2\n",
  1667. "6517 [중앙대학교 165.194.31.20 49157 219.153.48.112 1188... Malwr 2\n",
  1668. "4198 [한세대학교 210.12.116.124 22564 220.68.231.48 80 T... Attack 2\n",
  1669. "0 [계원예술대학교 117.17.214.4 2846 5.9.163.18 50001 TC... Malwr 1\n",
  1670. "... ... ... ...\n",
  1671. "3298 [경기대학교 50.62.176.64 40577 203.249.22.27 80 TCP... Attack 1\n",
  1672. "3299 [한세대학교 103.242.119.217 44889 220.68.249.69 80 ... Attack 1\n",
  1673. "3300 [동의대학교 185.247.181.7 37614 113.198.245.31 80 T... Attack 1\n",
  1674. "3301 [숙명여자대학교 46.119.174.102 51876 203.252.201.77 8... Attack 1\n",
  1675. "9887 [아주대학교 187.101.10.229 49802 202.30.20.81 80 TC... Attack 1\n",
  1676. "\n",
  1677. "[9888 rows x 3 columns]"
  1678. ]
  1679. },
  1680. "metadata": {},
  1681. "execution_count": 373
  1682. }
  1683. ],
  1684. "metadata": {}
  1685. },
  1686. {
  1687. "cell_type": "code",
  1688. "execution_count": 374,
  1689. "source": [
  1690. "# Attack Filter\n",
  1691. "def Attack_filter(ps):\n",
  1692. " return ' Attack' in ps[0]\n",
  1693. "\n",
  1694. "att_filter=prefix_NTM_df[list(map(Attack_filter, prefix_NTM_df.Cause))].fillna('Attack')\n",
  1695. "\n",
  1696. "# Malwr Filter\n",
  1697. "def Malwr_filter(ps):\n",
  1698. " return ' Malwr' in ps[0]\n",
  1699. "\n",
  1700. "mal_filter=prefix_NTM_df[list(map(Malwr_filter, prefix_NTM_df.Cause))].fillna('Malwr')\n",
  1701. "\n",
  1702. "# DDOS Filter\n",
  1703. "def DDOS_filter(ps):\n",
  1704. " return ' DDOS' in ps[0]\n",
  1705. "\n",
  1706. "dd_filter=prefix_NTM_df[list(map(DDOS_filter, prefix_NTM_df.Cause))].fillna('DDOS')\n",
  1707. "\n",
  1708. "# HACK Filter\n",
  1709. "def HACK_filter(ps):\n",
  1710. " return ' HACK' in ps[0]\n",
  1711. "\n",
  1712. "hack_filter=prefix_NTM_df[list(map(HACK_filter, prefix_NTM_df.Cause))].fillna('HACK')\n",
  1713. "\n",
  1714. "# MAIL Filter\n",
  1715. "def MAIL_filter(ps):\n",
  1716. " return ' MAIL' in ps[0]\n",
  1717. "\n",
  1718. "mail_filter=prefix_NTM_df[list(map(MAIL_filter, prefix_NTM_df.Cause))].fillna('MAIL')\n",
  1719. "\n",
  1720. "# WEB Filter\n",
  1721. "def WEB_filter(ps):\n",
  1722. " return ' WEB' in ps[0]\n",
  1723. "prefix_NTM_df\n",
  1724. "web_filter=prefix_NTM_df[list(map(WEB_filter, prefix_NTM_df.Cause))].fillna('WEB')\n",
  1725. "\n",
  1726. "frames = [att_filter, mal_filter, dd_filter, hack_filter, mail_filter, web_filter]\n",
  1727. "result = pd.concat(frames)\n",
  1728. "result.sort_values(by=['Frequency'],ascending=False)"
  1729. ],
  1730. "outputs": [
  1731. {
  1732. "output_type": "execute_result",
  1733. "data": {
  1734. "text/html": [
  1735. "<div>\n",
  1736. "<style scoped>\n",
  1737. " .dataframe tbody tr th:only-of-type {\n",
  1738. " vertical-align: middle;\n",
  1739. " }\n",
  1740. "\n",
  1741. " .dataframe tbody tr th {\n",
  1742. " vertical-align: top;\n",
  1743. " }\n",
  1744. "\n",
  1745. " .dataframe thead th {\n",
  1746. " text-align: right;\n",
  1747. " }\n",
  1748. "</style>\n",
  1749. "<table border=\"1\" class=\"dataframe\">\n",
  1750. " <thead>\n",
  1751. " <tr style=\"text-align: right;\">\n",
  1752. " <th></th>\n",
  1753. " <th>Cause</th>\n",
  1754. " <th>Effect</th>\n",
  1755. " <th>Frequency</th>\n",
  1756. " </tr>\n",
  1757. " </thead>\n",
  1758. " <tbody>\n",
  1759. " <tr>\n",
  1760. " <th>393</th>\n",
  1761. " <td>[중앙대학교 165.194.8.12 49159 219.153.48.112 1188 ...</td>\n",
  1762. " <td>Malwr</td>\n",
  1763. " <td>4</td>\n",
  1764. " </tr>\n",
  1765. " <tr>\n",
  1766. " <th>4198</th>\n",
  1767. " <td>[한세대학교 210.12.116.124 22564 220.68.231.48 80 T...</td>\n",
  1768. " <td>Attack</td>\n",
  1769. " <td>2</td>\n",
  1770. " </tr>\n",
  1771. " <tr>\n",
  1772. " <th>6517</th>\n",
  1773. " <td>[중앙대학교 165.194.31.20 49157 219.153.48.112 1188...</td>\n",
  1774. " <td>Malwr</td>\n",
  1775. " <td>2</td>\n",
  1776. " </tr>\n",
  1777. " <tr>\n",
  1778. " <th>1831</th>\n",
  1779. " <td>[상지대학교 158.69.38.240 62834 220.149.189.72 80 T...</td>\n",
  1780. " <td>Attack</td>\n",
  1781. " <td>2</td>\n",
  1782. " </tr>\n",
  1783. " <tr>\n",
  1784. " <th>8154</th>\n",
  1785. " <td>[고려대학교세종캠퍼스 218.76.223.50 46901 163.152.214.13...</td>\n",
  1786. " <td>Attack</td>\n",
  1787. " <td>1</td>\n",
  1788. " </tr>\n",
  1789. " <tr>\n",
  1790. " <th>...</th>\n",
  1791. " <td>...</td>\n",
  1792. " <td>...</td>\n",
  1793. " <td>...</td>\n",
  1794. " </tr>\n",
  1795. " <tr>\n",
  1796. " <th>4066</th>\n",
  1797. " <td>[서울대학교 122.51.16.226 60945 147.46.121.22 80 TC...</td>\n",
  1798. " <td>Attack</td>\n",
  1799. " <td>1</td>\n",
  1800. " </tr>\n",
  1801. " <tr>\n",
  1802. " <th>4067</th>\n",
  1803. " <td>[서울대학교 52.149.163.33 63408 147.47.106.230 80 T...</td>\n",
  1804. " <td>Attack</td>\n",
  1805. " <td>1</td>\n",
  1806. " </tr>\n",
  1807. " <tr>\n",
  1808. " <th>4068</th>\n",
  1809. " <td>[충남대학교 108.41.185.191 57224 168.188.40.65 80 T...</td>\n",
  1810. " <td>Attack</td>\n",
  1811. " <td>1</td>\n",
  1812. " </tr>\n",
  1813. " <tr>\n",
  1814. " <th>4069</th>\n",
  1815. " <td>[동덕여자대학교 203.30.236.64 62705 210.121.133.59 80...</td>\n",
  1816. " <td>Attack</td>\n",
  1817. " <td>1</td>\n",
  1818. " </tr>\n",
  1819. " <tr>\n",
  1820. " <th>9882</th>\n",
  1821. " <td>[숙명여자대학교 203.252.200.72 61489 78.47.123.172 45...</td>\n",
  1822. " <td>Malwr</td>\n",
  1823. " <td>1</td>\n",
  1824. " </tr>\n",
  1825. " </tbody>\n",
  1826. "</table>\n",
  1827. "<p>9824 rows × 3 columns</p>\n",
  1828. "</div>"
  1829. ],
  1830. "text/plain": [
  1831. " Cause Effect Frequency\n",
  1832. "393 [중앙대학교 165.194.8.12 49159 219.153.48.112 1188 ... Malwr 4\n",
  1833. "4198 [한세대학교 210.12.116.124 22564 220.68.231.48 80 T... Attack 2\n",
  1834. "6517 [중앙대학교 165.194.31.20 49157 219.153.48.112 1188... Malwr 2\n",
  1835. "1831 [상지대학교 158.69.38.240 62834 220.149.189.72 80 T... Attack 2\n",
  1836. "8154 [고려대학교세종캠퍼스 218.76.223.50 46901 163.152.214.13... Attack 1\n",
  1837. "... ... ... ...\n",
  1838. "4066 [서울대학교 122.51.16.226 60945 147.46.121.22 80 TC... Attack 1\n",
  1839. "4067 [서울대학교 52.149.163.33 63408 147.47.106.230 80 T... Attack 1\n",
  1840. "4068 [충남대학교 108.41.185.191 57224 168.188.40.65 80 T... Attack 1\n",
  1841. "4069 [동덕여자대학교 203.30.236.64 62705 210.121.133.59 80... Attack 1\n",
  1842. "9882 [숙명여자대학교 203.252.200.72 61489 78.47.123.172 45... Malwr 1\n",
  1843. "\n",
  1844. "[9824 rows x 3 columns]"
  1845. ]
  1846. },
  1847. "metadata": {},
  1848. "execution_count": 374
  1849. }
  1850. ],
  1851. "metadata": {}
  1852. },
  1853. {
  1854. "cell_type": "code",
  1855. "execution_count": null,
  1856. "source": [
  1857. "##################### NTM section End #####################"
  1858. ],
  1859. "outputs": [],
  1860. "metadata": {}
  1861. },
  1862. {
  1863. "cell_type": "code",
  1864. "execution_count": null,
  1865. "source": [
  1866. "##################### MTM section #####################"
  1867. ],
  1868. "outputs": [],
  1869. "metadata": {}
  1870. },
  1871. {
  1872. "cell_type": "code",
  1873. "execution_count": 375,
  1874. "source": [
  1875. "MTM_df=df[df['ACCD_FIND_MTD_CODE']=='2']\n",
  1876. "len(MTM_df)"
  1877. ],
  1878. "outputs": [
  1879. {
  1880. "output_type": "execute_result",
  1881. "data": {
  1882. "text/plain": [
  1883. "16"
  1884. ]
  1885. },
  1886. "metadata": {},
  1887. "execution_count": 375
  1888. }
  1889. ],
  1890. "metadata": {}
  1891. },
  1892. {
  1893. "cell_type": "code",
  1894. "execution_count": 376,
  1895. "source": [
  1896. "# Pick out it in order to get the asset, risk, intent, black IP out\n",
  1897. "RISK_V2_MTM=MTM_df['RISK_V2']\n",
  1898. "\n",
  1899. "RISK_V2_FILTERED_MTM=RISK_V2_MTM.dropna()\n",
  1900. "print(RISK_V2_MTM.size)\n",
  1901. "print(RISK_V2_FILTERED_MTM.size)\n"
  1902. ],
  1903. "outputs": [
  1904. {
  1905. "output_type": "stream",
  1906. "name": "stdout",
  1907. "text": [
  1908. "16\n",
  1909. "16\n"
  1910. ]
  1911. }
  1912. ],
  1913. "metadata": {}
  1914. },
  1915. {
  1916. "cell_type": "code",
  1917. "execution_count": 377,
  1918. "source": [
  1919. "def filter_assets_value_MTM(risk):\n",
  1920. " risks=[]\n",
  1921. " try:\n",
  1922. " for risk_key in risk:\n",
  1923. " if 'ASSETS_VAL_' in risk_key and risk[risk_key]:\n",
  1924. " risk_key_desc = 'RISK_V2.' + risk_key + '=' + get_asset_desc(risk_key)\n",
  1925. " risks.append(risk_key_desc)\n",
  1926. " except:\n",
  1927. " print(risk)\n",
  1928. " print(type(risk))\n",
  1929. " finally:\n",
  1930. " return risks"
  1931. ],
  1932. "outputs": [],
  1933. "metadata": {}
  1934. },
  1935. {
  1936. "cell_type": "code",
  1937. "execution_count": 378,
  1938. "source": [
  1939. "# modified\n",
  1940. "def get_asset_desc_MTM(asset_field):\n",
  1941. " if asset_field == 'ASSETS_VAL_1':\n",
  1942. " return '공인-전체IP대역(유선)'\n",
  1943. " elif asset_field == 'ASSETS_VAL_2':\n",
  1944. " return '공인-전체IP대역(무선)'\n",
  1945. " elif asset_field == 'ASSETS_VAL_3':\n",
  1946. " return '공인-WEB서버'\n",
  1947. " elif asset_field == 'ASSETS_VAL_4':\n",
  1948. " return '공인-내부응용서버'\n",
  1949. " elif asset_field == 'ASSETS_VAL_5':\n",
  1950. " return '공인-DB서버'\n",
  1951. " elif asset_field == 'ASSETS_VAL_6':\n",
  1952. " return '공인-패치서버'\n",
  1953. " elif asset_field == 'ASSETS_VAL_7':\n",
  1954. " return '공인-네트워크'\n",
  1955. " elif asset_field == 'ASSETS_VAL_8':\n",
  1956. " return '공인-보안'\n",
  1957. " elif asset_field == 'ASSETS_VAL_9':\n",
  1958. " return '공인-업무용PC'\n",
  1959. " elif asset_field == 'ASSETS_VAL_10':\n",
  1960. " return '공인-비업무용PC'\n",
  1961. " elif asset_field == 'ASSETS_VAL_11':\n",
  1962. " return '공인-기타'\n",
  1963. " elif asset_field == 'ASSETS_VAL_12':\n",
  1964. " return '사설-전체IP대역(유선)'\n",
  1965. " elif asset_field == 'ASSETS_VAL_13':\n",
  1966. " return '사설-전체IP대역(무선)'\n",
  1967. " elif asset_field == 'ASSETS_VAL_14':\n",
  1968. " return '사설-WEB서버'\n",
  1969. " elif asset_field == 'ASSETS_VAL_15':\n",
  1970. " return '사설-내부응용서버'\n",
  1971. " elif asset_field == 'ASSETS_VAL_16':\n",
  1972. " return '사설-DB서버'\n",
  1973. " elif asset_field == 'ASSETS_VAL_17':\n",
  1974. " return '사설-패치서버'\n",
  1975. " elif asset_field == 'ASSETS_VAL_18':\n",
  1976. " return '사설-네트워크'\n",
  1977. " elif asset_field == 'ASSETS_VAL_19':\n",
  1978. " return '사설-보안'\n",
  1979. " elif asset_field == 'ASSETS_VAL_20':\n",
  1980. " return '사설-업무용PC'\n",
  1981. " elif asset_field == 'ASSETS_VAL_21':\n",
  1982. " return '사설-비업무용PC'\n",
  1983. " elif asset_field == 'ASSETS_VAL_22':\n",
  1984. " return '사설-기타'\n",
  1985. " else:\n",
  1986. " return ''"
  1987. ],
  1988. "outputs": [],
  1989. "metadata": {}
  1990. },
  1991. {
  1992. "cell_type": "code",
  1993. "execution_count": 379,
  1994. "source": [
  1995. "# New assets column\n",
  1996. "MTM_df['ASSETS_VAL']=list(map(filter_assets_value_MTM, RISK_V2_FILTERED_MTM))\n",
  1997. "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)\n",
  1998. "MTM_df[:1]"
  1999. ],
  2000. "outputs": [
  2001. {
  2002. "output_type": "stream",
  2003. "name": "stderr",
  2004. "text": [
  2005. "<ipython-input-379-706f111c14af>:2: SettingWithCopyWarning: \n",
  2006. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2007. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2008. "\n",
  2009. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2010. " MTM_df['ASSETS_VAL']=list(map(filter_assets_value_MTM, RISK_V2_FILTERED_MTM))\n",
  2011. "<ipython-input-379-706f111c14af>:3: SettingWithCopyWarning: \n",
  2012. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2013. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2014. "\n",
  2015. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2016. " MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].astype(str)\n"
  2017. ]
  2018. },
  2019. {
  2020. "output_type": "execute_result",
  2021. "data": {
  2022. "text/html": [
  2023. "<div>\n",
  2024. "<style scoped>\n",
  2025. " .dataframe tbody tr th:only-of-type {\n",
  2026. " vertical-align: middle;\n",
  2027. " }\n",
  2028. "\n",
  2029. " .dataframe tbody tr th {\n",
  2030. " vertical-align: top;\n",
  2031. " }\n",
  2032. "\n",
  2033. " .dataframe thead th {\n",
  2034. " text-align: right;\n",
  2035. " }\n",
  2036. "</style>\n",
  2037. "<table border=\"1\" class=\"dataframe\">\n",
  2038. " <thead>\n",
  2039. " <tr style=\"text-align: right;\">\n",
  2040. " <th></th>\n",
  2041. " <th>RISK_V2</th>\n",
  2042. " <th>INST_NM</th>\n",
  2043. " <th>DRULE_ATT_TYPE_CODE1</th>\n",
  2044. " <th>TW_ATT_IP</th>\n",
  2045. " <th>TW_ATT_PORT</th>\n",
  2046. " <th>TW_DMG_IP</th>\n",
  2047. " <th>TW_DMG_PORT</th>\n",
  2048. " <th>ACCD_DMG_PROTO_NM</th>\n",
  2049. " <th>TW_ATT_CT_NM</th>\n",
  2050. " <th>ACCD_FIND_MTD_CODE</th>\n",
  2051. " <th>DRULE_NM</th>\n",
  2052. " <th>ASSETS_VAL</th>\n",
  2053. " </tr>\n",
  2054. " </thead>\n",
  2055. " <tbody>\n",
  2056. " <tr>\n",
  2057. " <th>8</th>\n",
  2058. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  2059. " <td>남서울대학교</td>\n",
  2060. " <td>Attack</td>\n",
  2061. " <td>159.69.250.163</td>\n",
  2062. " <td>80</td>\n",
  2063. " <td>192.170.112.14</td>\n",
  2064. " <td>8225</td>\n",
  2065. " <td></td>\n",
  2066. " <td>독일</td>\n",
  2067. " <td>2</td>\n",
  2068. " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
  2069. " <td>[]</td>\n",
  2070. " </tr>\n",
  2071. " </tbody>\n",
  2072. "</table>\n",
  2073. "</div>"
  2074. ],
  2075. "text/plain": [
  2076. " RISK_V2 INST_NM \\\n",
  2077. "8 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 남서울대학교 \n",
  2078. "\n",
  2079. " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP \\\n",
  2080. "8 Attack 159.69.250.163 80 192.170.112.14 \n",
  2081. "\n",
  2082. " TW_DMG_PORT ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
  2083. "8 8225 독일 2 \n",
  2084. "\n",
  2085. " DRULE_NM ASSETS_VAL \n",
  2086. "8 Malwr_Infected_29_11_exe_collect_20042001ECSC [] "
  2087. ]
  2088. },
  2089. "metadata": {},
  2090. "execution_count": 379
  2091. }
  2092. ],
  2093. "metadata": {}
  2094. },
  2095. {
  2096. "cell_type": "code",
  2097. "execution_count": 381,
  2098. "source": [
  2099. "# modified\n",
  2100. "def filter_intent_MTM(intent):\n",
  2101. " intents=[]\n",
  2102. " for intent_key in intent:\n",
  2103. " if 'INTENT_VAL_' in intent_key and intent[intent_key]:\n",
  2104. " intent_key_desc = 'RISK_V2.' + intent_key + '=' + get_intent_desc(intent_key)\n",
  2105. " intents.append(intent_key_desc)\n",
  2106. " return intents"
  2107. ],
  2108. "outputs": [],
  2109. "metadata": {}
  2110. },
  2111. {
  2112. "cell_type": "code",
  2113. "execution_count": 382,
  2114. "source": [
  2115. "def get_intent_desc_MTM(intent_field):\n",
  2116. " if intent_field == 'INTENT_VAL_1':\n",
  2117. " return '파괴'\n",
  2118. " elif intent_field == 'INTENT_VAL_2':\n",
  2119. " return '유출'\n",
  2120. " elif intent_field == 'INTENT_VAL_3':\n",
  2121. " return '지연'\n",
  2122. " elif intent_field == 'INTENT_VAL_4':\n",
  2123. " return '잠복'\n",
  2124. " elif intent_field == 'INTENT_VAL_5':\n",
  2125. " return '단순침입'\n",
  2126. " elif intent_field == 'INTENT_VAL_6':\n",
  2127. " return 'MD5'\n",
  2128. " elif intent_field == 'INTENT_VAL_0':\n",
  2129. " return 'Default'\n",
  2130. " else:\n",
  2131. " return ''"
  2132. ],
  2133. "outputs": [],
  2134. "metadata": {}
  2135. },
  2136. {
  2137. "cell_type": "code",
  2138. "execution_count": 383,
  2139. "source": [
  2140. "# New column of intent value\n",
  2141. "MTM_df['INTENT_VAL']=list(map(filter_intent_MTM, RISK_V2_FILTERED_MTM))\n",
  2142. "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)\n",
  2143. "MTM_df[:1]"
  2144. ],
  2145. "outputs": [
  2146. {
  2147. "output_type": "stream",
  2148. "name": "stderr",
  2149. "text": [
  2150. "<ipython-input-383-e32409088c44>:2: SettingWithCopyWarning: \n",
  2151. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2152. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2153. "\n",
  2154. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2155. " MTM_df['INTENT_VAL']=list(map(filter_intent_MTM, RISK_V2_FILTERED_MTM))\n",
  2156. "<ipython-input-383-e32409088c44>:3: SettingWithCopyWarning: \n",
  2157. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2158. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2159. "\n",
  2160. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2161. " MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].astype(str)\n"
  2162. ]
  2163. },
  2164. {
  2165. "output_type": "execute_result",
  2166. "data": {
  2167. "text/html": [
  2168. "<div>\n",
  2169. "<style scoped>\n",
  2170. " .dataframe tbody tr th:only-of-type {\n",
  2171. " vertical-align: middle;\n",
  2172. " }\n",
  2173. "\n",
  2174. " .dataframe tbody tr th {\n",
  2175. " vertical-align: top;\n",
  2176. " }\n",
  2177. "\n",
  2178. " .dataframe thead th {\n",
  2179. " text-align: right;\n",
  2180. " }\n",
  2181. "</style>\n",
  2182. "<table border=\"1\" class=\"dataframe\">\n",
  2183. " <thead>\n",
  2184. " <tr style=\"text-align: right;\">\n",
  2185. " <th></th>\n",
  2186. " <th>RISK_V2</th>\n",
  2187. " <th>INST_NM</th>\n",
  2188. " <th>DRULE_ATT_TYPE_CODE1</th>\n",
  2189. " <th>TW_ATT_IP</th>\n",
  2190. " <th>TW_ATT_PORT</th>\n",
  2191. " <th>TW_DMG_IP</th>\n",
  2192. " <th>TW_DMG_PORT</th>\n",
  2193. " <th>ACCD_DMG_PROTO_NM</th>\n",
  2194. " <th>TW_ATT_CT_NM</th>\n",
  2195. " <th>ACCD_FIND_MTD_CODE</th>\n",
  2196. " <th>DRULE_NM</th>\n",
  2197. " <th>ASSETS_VAL</th>\n",
  2198. " <th>INTENT_VAL</th>\n",
  2199. " </tr>\n",
  2200. " </thead>\n",
  2201. " <tbody>\n",
  2202. " <tr>\n",
  2203. " <th>8</th>\n",
  2204. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  2205. " <td>남서울대학교</td>\n",
  2206. " <td>Attack</td>\n",
  2207. " <td>159.69.250.163</td>\n",
  2208. " <td>80</td>\n",
  2209. " <td>192.170.112.14</td>\n",
  2210. " <td>8225</td>\n",
  2211. " <td></td>\n",
  2212. " <td>독일</td>\n",
  2213. " <td>2</td>\n",
  2214. " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
  2215. " <td>[]</td>\n",
  2216. " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
  2217. " </tr>\n",
  2218. " </tbody>\n",
  2219. "</table>\n",
  2220. "</div>"
  2221. ],
  2222. "text/plain": [
  2223. " RISK_V2 INST_NM \\\n",
  2224. "8 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 남서울대학교 \n",
  2225. "\n",
  2226. " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP \\\n",
  2227. "8 Attack 159.69.250.163 80 192.170.112.14 \n",
  2228. "\n",
  2229. " TW_DMG_PORT ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
  2230. "8 8225 독일 2 \n",
  2231. "\n",
  2232. " DRULE_NM ASSETS_VAL \\\n",
  2233. "8 Malwr_Infected_29_11_exe_collect_20042001ECSC [] \n",
  2234. "\n",
  2235. " INTENT_VAL \n",
  2236. "8 ['RISK_V2.INTENT_VAL_0=Default'] "
  2237. ]
  2238. },
  2239. "metadata": {},
  2240. "execution_count": 383
  2241. }
  2242. ],
  2243. "metadata": {}
  2244. },
  2245. {
  2246. "cell_type": "code",
  2247. "execution_count": 384,
  2248. "source": [
  2249. "# modified\n",
  2250. "def filter_source_MTM(source):\n",
  2251. " sources=[]\n",
  2252. " for source_key in source:\n",
  2253. " if 'SOURCE_VAL_' in source_key and source[source_key]:\n",
  2254. " source_key_desc='RISK_V2.' + source_key + '=' + get_source_desc(source_key)\n",
  2255. " sources.append(source_key_desc)\n",
  2256. " return sources"
  2257. ],
  2258. "outputs": [],
  2259. "metadata": {}
  2260. },
  2261. {
  2262. "cell_type": "code",
  2263. "execution_count": 385,
  2264. "source": [
  2265. "def get_source_desc_MTM(source_field):\n",
  2266. " if source_field=='SOURCE_VAL_1':\n",
  2267. " return '북한IP'\n",
  2268. " if source_field=='SOURCE_VAL_3':\n",
  2269. " return 'ECSC Black IP'\n",
  2270. " else:\n",
  2271. " return ''"
  2272. ],
  2273. "outputs": [],
  2274. "metadata": {}
  2275. },
  2276. {
  2277. "cell_type": "code",
  2278. "execution_count": 386,
  2279. "source": [
  2280. "# New column of SOURCE_VAL value\n",
  2281. "MTM_df['SOURCE_VAL']=list(map(filter_source_MTM, RISK_V2_FILTERED_MTM))\n",
  2282. "MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].astype(str)\n",
  2283. "MTM_df[:5]"
  2284. ],
  2285. "outputs": [
  2286. {
  2287. "output_type": "stream",
  2288. "name": "stderr",
  2289. "text": [
  2290. "<ipython-input-386-f88f537aeb2d>:2: SettingWithCopyWarning: \n",
  2291. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2292. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2293. "\n",
  2294. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2295. " MTM_df['SOURCE_VAL']=list(map(filter_source_MTM, RISK_V2_FILTERED_MTM))\n",
  2296. "<ipython-input-386-f88f537aeb2d>:3: SettingWithCopyWarning: \n",
  2297. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2298. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2299. "\n",
  2300. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2301. " MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].astype(str)\n"
  2302. ]
  2303. },
  2304. {
  2305. "output_type": "execute_result",
  2306. "data": {
  2307. "text/html": [
  2308. "<div>\n",
  2309. "<style scoped>\n",
  2310. " .dataframe tbody tr th:only-of-type {\n",
  2311. " vertical-align: middle;\n",
  2312. " }\n",
  2313. "\n",
  2314. " .dataframe tbody tr th {\n",
  2315. " vertical-align: top;\n",
  2316. " }\n",
  2317. "\n",
  2318. " .dataframe thead th {\n",
  2319. " text-align: right;\n",
  2320. " }\n",
  2321. "</style>\n",
  2322. "<table border=\"1\" class=\"dataframe\">\n",
  2323. " <thead>\n",
  2324. " <tr style=\"text-align: right;\">\n",
  2325. " <th></th>\n",
  2326. " <th>RISK_V2</th>\n",
  2327. " <th>INST_NM</th>\n",
  2328. " <th>DRULE_ATT_TYPE_CODE1</th>\n",
  2329. " <th>TW_ATT_IP</th>\n",
  2330. " <th>TW_ATT_PORT</th>\n",
  2331. " <th>TW_DMG_IP</th>\n",
  2332. " <th>TW_DMG_PORT</th>\n",
  2333. " <th>ACCD_DMG_PROTO_NM</th>\n",
  2334. " <th>TW_ATT_CT_NM</th>\n",
  2335. " <th>ACCD_FIND_MTD_CODE</th>\n",
  2336. " <th>DRULE_NM</th>\n",
  2337. " <th>ASSETS_VAL</th>\n",
  2338. " <th>INTENT_VAL</th>\n",
  2339. " <th>SOURCE_VAL</th>\n",
  2340. " </tr>\n",
  2341. " </thead>\n",
  2342. " <tbody>\n",
  2343. " <tr>\n",
  2344. " <th>8</th>\n",
  2345. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  2346. " <td>남서울대학교</td>\n",
  2347. " <td>Attack</td>\n",
  2348. " <td>159.69.250.163</td>\n",
  2349. " <td>80</td>\n",
  2350. " <td>192.170.112.14</td>\n",
  2351. " <td>8225</td>\n",
  2352. " <td></td>\n",
  2353. " <td>독일</td>\n",
  2354. " <td>2</td>\n",
  2355. " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
  2356. " <td>[]</td>\n",
  2357. " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
  2358. " <td>[]</td>\n",
  2359. " </tr>\n",
  2360. " <tr>\n",
  2361. " <th>322</th>\n",
  2362. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  2363. " <td>전라북도교육청</td>\n",
  2364. " <td>Attack</td>\n",
  2365. " <td>115.95.20.116</td>\n",
  2366. " <td>80</td>\n",
  2367. " <td>211.251.39.65</td>\n",
  2368. " <td>2447</td>\n",
  2369. " <td>TCP</td>\n",
  2370. " <td>대한민국</td>\n",
  2371. " <td>2</td>\n",
  2372. " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
  2373. " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)']</td>\n",
  2374. " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
  2375. " <td>[]</td>\n",
  2376. " </tr>\n",
  2377. " <tr>\n",
  2378. " <th>1064</th>\n",
  2379. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  2380. " <td>전라북도교육청</td>\n",
  2381. " <td>Attack</td>\n",
  2382. " <td>211.210.30.28</td>\n",
  2383. " <td>80</td>\n",
  2384. " <td>211.251.133.40</td>\n",
  2385. " <td>4716</td>\n",
  2386. " <td>TCP</td>\n",
  2387. " <td>대한민국</td>\n",
  2388. " <td>2</td>\n",
  2389. " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
  2390. " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V...</td>\n",
  2391. " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
  2392. " <td>[]</td>\n",
  2393. " </tr>\n",
  2394. " <tr>\n",
  2395. " <th>1419</th>\n",
  2396. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  2397. " <td>전라북도교육청</td>\n",
  2398. " <td>Attack</td>\n",
  2399. " <td>211.210.30.28</td>\n",
  2400. " <td>80</td>\n",
  2401. " <td>211.251.122.33</td>\n",
  2402. " <td>4523</td>\n",
  2403. " <td>TCP</td>\n",
  2404. " <td>대한민국</td>\n",
  2405. " <td>2</td>\n",
  2406. " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
  2407. " <td>['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V...</td>\n",
  2408. " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
  2409. " <td>[]</td>\n",
  2410. " </tr>\n",
  2411. " <tr>\n",
  2412. " <th>2973</th>\n",
  2413. " <td>{'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE...</td>\n",
  2414. " <td>남서울대학교</td>\n",
  2415. " <td>Attack</td>\n",
  2416. " <td>202.176.5.136</td>\n",
  2417. " <td>8136</td>\n",
  2418. " <td>192.168.107.59</td>\n",
  2419. " <td>49207</td>\n",
  2420. " <td>TCP</td>\n",
  2421. " <td>말레이시아</td>\n",
  2422. " <td>2</td>\n",
  2423. " <td>Malwr_Infected_29_11_exe_collect_20042001ECSC</td>\n",
  2424. " <td>[]</td>\n",
  2425. " <td>['RISK_V2.INTENT_VAL_0=Default']</td>\n",
  2426. " <td>[]</td>\n",
  2427. " </tr>\n",
  2428. " </tbody>\n",
  2429. "</table>\n",
  2430. "</div>"
  2431. ],
  2432. "text/plain": [
  2433. " RISK_V2 INST_NM \\\n",
  2434. "8 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 남서울대학교 \n",
  2435. "322 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 전라북도교육청 \n",
  2436. "1064 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 전라북도교육청 \n",
  2437. "1419 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 전라북도교육청 \n",
  2438. "2973 {'WEEKNESS_VAL_4': 0, 'WEEKNESS_VAL_5': 0, 'WE... 남서울대학교 \n",
  2439. "\n",
  2440. " DRULE_ATT_TYPE_CODE1 TW_ATT_IP TW_ATT_PORT TW_DMG_IP \\\n",
  2441. "8 Attack 159.69.250.163 80 192.170.112.14 \n",
  2442. "322 Attack 115.95.20.116 80 211.251.39.65 \n",
  2443. "1064 Attack 211.210.30.28 80 211.251.133.40 \n",
  2444. "1419 Attack 211.210.30.28 80 211.251.122.33 \n",
  2445. "2973 Attack 202.176.5.136 8136 192.168.107.59 \n",
  2446. "\n",
  2447. " TW_DMG_PORT ACCD_DMG_PROTO_NM TW_ATT_CT_NM ACCD_FIND_MTD_CODE \\\n",
  2448. "8 8225 독일 2 \n",
  2449. "322 2447 TCP 대한민국 2 \n",
  2450. "1064 4716 TCP 대한민국 2 \n",
  2451. "1419 4523 TCP 대한민국 2 \n",
  2452. "2973 49207 TCP 말레이시아 2 \n",
  2453. "\n",
  2454. " DRULE_NM \\\n",
  2455. "8 Malwr_Infected_29_11_exe_collect_20042001ECSC \n",
  2456. "322 Malwr_Infected_29_11_exe_collect_20042001ECSC \n",
  2457. "1064 Malwr_Infected_29_11_exe_collect_20042001ECSC \n",
  2458. "1419 Malwr_Infected_29_11_exe_collect_20042001ECSC \n",
  2459. "2973 Malwr_Infected_29_11_exe_collect_20042001ECSC \n",
  2460. "\n",
  2461. " ASSETS_VAL \\\n",
  2462. "8 [] \n",
  2463. "322 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] \n",
  2464. "1064 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V... \n",
  2465. "1419 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V... \n",
  2466. "2973 [] \n",
  2467. "\n",
  2468. " INTENT_VAL SOURCE_VAL \n",
  2469. "8 ['RISK_V2.INTENT_VAL_0=Default'] [] \n",
  2470. "322 ['RISK_V2.INTENT_VAL_0=Default'] [] \n",
  2471. "1064 ['RISK_V2.INTENT_VAL_0=Default'] [] \n",
  2472. "1419 ['RISK_V2.INTENT_VAL_0=Default'] [] \n",
  2473. "2973 ['RISK_V2.INTENT_VAL_0=Default'] [] "
  2474. ]
  2475. },
  2476. "metadata": {},
  2477. "execution_count": 386
  2478. }
  2479. ],
  2480. "metadata": {}
  2481. },
  2482. {
  2483. "cell_type": "code",
  2484. "execution_count": 387,
  2485. "source": [
  2486. "MTM_df.drop(columns=['RISK_V2'], inplace=True)\n",
  2487. "MTM_df.columns"
  2488. ],
  2489. "outputs": [
  2490. {
  2491. "output_type": "stream",
  2492. "name": "stderr",
  2493. "text": [
  2494. "/Users/joohyunyoon/.pyenv/versions/anaconda3-2021.05/lib/python3.8/site-packages/pandas/core/frame.py:4308: SettingWithCopyWarning: \n",
  2495. "A value is trying to be set on a copy of a slice from a DataFrame\n",
  2496. "\n",
  2497. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2498. " return super().drop(\n"
  2499. ]
  2500. },
  2501. {
  2502. "output_type": "execute_result",
  2503. "data": {
  2504. "text/plain": [
  2505. "Index(['INST_NM', 'DRULE_ATT_TYPE_CODE1', 'TW_ATT_IP', 'TW_ATT_PORT',\n",
  2506. " 'TW_DMG_IP', 'TW_DMG_PORT', 'ACCD_DMG_PROTO_NM', 'TW_ATT_CT_NM',\n",
  2507. " 'ACCD_FIND_MTD_CODE', 'DRULE_NM', 'ASSETS_VAL', 'INTENT_VAL',\n",
  2508. " 'SOURCE_VAL'],\n",
  2509. " dtype='object')"
  2510. ]
  2511. },
  2512. "metadata": {},
  2513. "execution_count": 387
  2514. }
  2515. ],
  2516. "metadata": {}
  2517. },
  2518. {
  2519. "cell_type": "code",
  2520. "execution_count": 388,
  2521. "source": [
  2522. "MTM_df.isna().sum()"
  2523. ],
  2524. "outputs": [
  2525. {
  2526. "output_type": "execute_result",
  2527. "data": {
  2528. "text/plain": [
  2529. "INST_NM 0\n",
  2530. "DRULE_ATT_TYPE_CODE1 0\n",
  2531. "TW_ATT_IP 0\n",
  2532. "TW_ATT_PORT 0\n",
  2533. "TW_DMG_IP 0\n",
  2534. "TW_DMG_PORT 0\n",
  2535. "ACCD_DMG_PROTO_NM 0\n",
  2536. "TW_ATT_CT_NM 0\n",
  2537. "ACCD_FIND_MTD_CODE 0\n",
  2538. "DRULE_NM 0\n",
  2539. "ASSETS_VAL 0\n",
  2540. "INTENT_VAL 0\n",
  2541. "SOURCE_VAL 0\n",
  2542. "dtype: int64"
  2543. ]
  2544. },
  2545. "metadata": {},
  2546. "execution_count": 388
  2547. }
  2548. ],
  2549. "metadata": {}
  2550. },
  2551. {
  2552. "cell_type": "code",
  2553. "execution_count": 389,
  2554. "source": [
  2555. "# Change the Nan to zero\n",
  2556. "MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
  2557. "MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')\n",
  2558. "MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
  2559. "MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
  2560. "MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
  2561. "MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
  2562. "MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
  2563. "MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
  2564. "MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
  2565. "MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)\n",
  2566. "MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
  2567. "MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')"
  2568. ],
  2569. "outputs": [
  2570. {
  2571. "output_type": "stream",
  2572. "name": "stderr",
  2573. "text": [
  2574. "<ipython-input-389-02158490b065>:2: SettingWithCopyWarning: \n",
  2575. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2576. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2577. "\n",
  2578. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2579. " MTM_df['ACCD_DMG_PROTO_NM']=MTM_df['ACCD_DMG_PROTO_NM'].replace(np.nan,'')\n",
  2580. "<ipython-input-389-02158490b065>:3: SettingWithCopyWarning: \n",
  2581. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2582. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2583. "\n",
  2584. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2585. " MTM_df['INST_NM']=MTM_df['INST_NM'].replace(np.nan,'')\n",
  2586. "<ipython-input-389-02158490b065>:4: SettingWithCopyWarning: \n",
  2587. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2588. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2589. "\n",
  2590. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2591. " MTM_df['DRULE_ATT_TYPE_CODE1']=MTM_df['DRULE_ATT_TYPE_CODE1'].replace(np.nan,'')\n",
  2592. "<ipython-input-389-02158490b065>:5: SettingWithCopyWarning: \n",
  2593. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2594. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2595. "\n",
  2596. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2597. " MTM_df['TW_ATT_IP']=MTM_df['TW_ATT_IP'].replace(np.nan,0)\n",
  2598. "<ipython-input-389-02158490b065>:6: SettingWithCopyWarning: \n",
  2599. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2600. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2601. "\n",
  2602. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2603. " MTM_df['TW_ATT_PORT']=MTM_df['TW_ATT_PORT'].replace(np.nan,0)\n",
  2604. "<ipython-input-389-02158490b065>:7: SettingWithCopyWarning: \n",
  2605. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2606. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2607. "\n",
  2608. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2609. " MTM_df['TW_DMG_IP']=MTM_df['TW_DMG_IP'].replace(np.nan,0)\n",
  2610. "<ipython-input-389-02158490b065>:8: SettingWithCopyWarning: \n",
  2611. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2612. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2613. "\n",
  2614. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2615. " MTM_df['TW_DMG_PORT']=MTM_df['TW_DMG_PORT'].replace(np.nan,0)\n",
  2616. "<ipython-input-389-02158490b065>:9: SettingWithCopyWarning: \n",
  2617. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2618. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2619. "\n",
  2620. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2621. " MTM_df['TW_ATT_CT_NM']=MTM_df['TW_ATT_CT_NM'].replace(np.nan,'')\n",
  2622. "<ipython-input-389-02158490b065>:10: SettingWithCopyWarning: \n",
  2623. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2624. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2625. "\n",
  2626. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2627. " MTM_df['ASSETS_VAL']=MTM_df['ASSETS_VAL'].replace(np.nan,0)\n",
  2628. "<ipython-input-389-02158490b065>:11: SettingWithCopyWarning: \n",
  2629. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2630. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2631. "\n",
  2632. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2633. " MTM_df['INTENT_VAL']=MTM_df['INTENT_VAL'].replace(np.nan,0)\n",
  2634. "<ipython-input-389-02158490b065>:12: SettingWithCopyWarning: \n",
  2635. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2636. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2637. "\n",
  2638. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2639. " MTM_df['SOURCE_VAL']=MTM_df['SOURCE_VAL'].replace(np.nan,0)\n",
  2640. "<ipython-input-389-02158490b065>:13: SettingWithCopyWarning: \n",
  2641. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2642. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2643. "\n",
  2644. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2645. " MTM_df['DRULE_NM']=MTM_df['DRULE_NM'].replace(np.nan,'')\n"
  2646. ]
  2647. }
  2648. ],
  2649. "metadata": {}
  2650. },
  2651. {
  2652. "cell_type": "code",
  2653. "execution_count": 390,
  2654. "source": [
  2655. "# Check NaN out again\n",
  2656. "MTM_df.isna().sum()"
  2657. ],
  2658. "outputs": [
  2659. {
  2660. "output_type": "execute_result",
  2661. "data": {
  2662. "text/plain": [
  2663. "INST_NM 0\n",
  2664. "DRULE_ATT_TYPE_CODE1 0\n",
  2665. "TW_ATT_IP 0\n",
  2666. "TW_ATT_PORT 0\n",
  2667. "TW_DMG_IP 0\n",
  2668. "TW_DMG_PORT 0\n",
  2669. "ACCD_DMG_PROTO_NM 0\n",
  2670. "TW_ATT_CT_NM 0\n",
  2671. "ACCD_FIND_MTD_CODE 0\n",
  2672. "DRULE_NM 0\n",
  2673. "ASSETS_VAL 0\n",
  2674. "INTENT_VAL 0\n",
  2675. "SOURCE_VAL 0\n",
  2676. "dtype: int64"
  2677. ]
  2678. },
  2679. "metadata": {},
  2680. "execution_count": 390
  2681. }
  2682. ],
  2683. "metadata": {}
  2684. },
  2685. {
  2686. "cell_type": "code",
  2687. "execution_count": 391,
  2688. "source": [
  2689. "# # Merge all\n",
  2690. "\n",
  2691. "# # Make one string from all of elements\n",
  2692. "MTM_df['Combined']=MTM_df['INST_NM'].astype(str)+' '+MTM_df['TW_ATT_IP'].astype(str)+' '+MTM_df['TW_ATT_PORT'].astype(str)+' '+MTM_df['TW_DMG_IP'].astype(str)+' '+MTM_df['TW_DMG_PORT'].astype(str) +' '+MTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+MTM_df['TW_ATT_CT_NM']+' '+MTM_df['ASSETS_VAL']+' '+MTM_df['INTENT_VAL']+' '+MTM_df['SOURCE_VAL']+' '+MTM_df['DRULE_ATT_TYPE_CODE1']+' '+MTM_df['DRULE_NM']\n",
  2693. "\n",
  2694. "MTM_com=MTM_df['Combined']\n",
  2695. "MTM_com[:10]\n"
  2696. ],
  2697. "outputs": [
  2698. {
  2699. "output_type": "stream",
  2700. "name": "stderr",
  2701. "text": [
  2702. "<ipython-input-391-644b4d0a1409>:4: SettingWithCopyWarning: \n",
  2703. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  2704. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  2705. "\n",
  2706. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  2707. " MTM_df['Combined']=MTM_df['INST_NM'].astype(str)+' '+MTM_df['TW_ATT_IP'].astype(str)+' '+MTM_df['TW_ATT_PORT'].astype(str)+' '+MTM_df['TW_DMG_IP'].astype(str)+' '+MTM_df['TW_DMG_PORT'].astype(str) +' '+MTM_df['ACCD_DMG_PROTO_NM'].astype(str)+' '+MTM_df['TW_ATT_CT_NM']+' '+MTM_df['ASSETS_VAL']+' '+MTM_df['INTENT_VAL']+' '+MTM_df['SOURCE_VAL']+' '+MTM_df['DRULE_ATT_TYPE_CODE1']+' '+MTM_df['DRULE_NM']\n"
  2708. ]
  2709. },
  2710. {
  2711. "output_type": "execute_result",
  2712. "data": {
  2713. "text/plain": [
  2714. "8 남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...\n",
  2715. "322 전라북도교육청 115.95.20.116 80 211.251.39.65 2447 TC...\n",
  2716. "1064 전라북도교육청 211.210.30.28 80 211.251.133.40 4716 T...\n",
  2717. "1419 전라북도교육청 211.210.30.28 80 211.251.122.33 4523 T...\n",
  2718. "2973 남서울대학교 202.176.5.136 8136 192.168.107.59 49207...\n",
  2719. "3584 경남대학교 209.250.247.60 80 203.253.180.71 2073 TC...\n",
  2720. "4270 전라북도교육청 211.210.30.28 80 211.251.117.65 5212 T...\n",
  2721. "4490 부산대학교 164.125.248.75 1098 195.158.31.58 2961 T...\n",
  2722. "5506 부산대학교 164.125.242.79 52896 46.249.119.133 4941...\n",
  2723. "5765 전라북도교육청 219.249.231.84 80 211.251.82.1 52721 T...\n",
  2724. "Name: Combined, dtype: object"
  2725. ]
  2726. },
  2727. "metadata": {},
  2728. "execution_count": 391
  2729. }
  2730. ],
  2731. "metadata": {}
  2732. },
  2733. {
  2734. "cell_type": "code",
  2735. "execution_count": 392,
  2736. "source": [
  2737. "# Change the type to DataFrame\n",
  2738. "MTM_to_df=pd.DataFrame(MTM_com)\n",
  2739. "MTM_to_df[:5]"
  2740. ],
  2741. "outputs": [
  2742. {
  2743. "output_type": "execute_result",
  2744. "data": {
  2745. "text/html": [
  2746. "<div>\n",
  2747. "<style scoped>\n",
  2748. " .dataframe tbody tr th:only-of-type {\n",
  2749. " vertical-align: middle;\n",
  2750. " }\n",
  2751. "\n",
  2752. " .dataframe tbody tr th {\n",
  2753. " vertical-align: top;\n",
  2754. " }\n",
  2755. "\n",
  2756. " .dataframe thead th {\n",
  2757. " text-align: right;\n",
  2758. " }\n",
  2759. "</style>\n",
  2760. "<table border=\"1\" class=\"dataframe\">\n",
  2761. " <thead>\n",
  2762. " <tr style=\"text-align: right;\">\n",
  2763. " <th></th>\n",
  2764. " <th>Combined</th>\n",
  2765. " </tr>\n",
  2766. " </thead>\n",
  2767. " <tbody>\n",
  2768. " <tr>\n",
  2769. " <th>8</th>\n",
  2770. " <td>남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
  2771. " </tr>\n",
  2772. " <tr>\n",
  2773. " <th>322</th>\n",
  2774. " <td>전라북도교육청 115.95.20.116 80 211.251.39.65 2447 TC...</td>\n",
  2775. " </tr>\n",
  2776. " <tr>\n",
  2777. " <th>1064</th>\n",
  2778. " <td>전라북도교육청 211.210.30.28 80 211.251.133.40 4716 T...</td>\n",
  2779. " </tr>\n",
  2780. " <tr>\n",
  2781. " <th>1419</th>\n",
  2782. " <td>전라북도교육청 211.210.30.28 80 211.251.122.33 4523 T...</td>\n",
  2783. " </tr>\n",
  2784. " <tr>\n",
  2785. " <th>2973</th>\n",
  2786. " <td>남서울대학교 202.176.5.136 8136 192.168.107.59 49207...</td>\n",
  2787. " </tr>\n",
  2788. " </tbody>\n",
  2789. "</table>\n",
  2790. "</div>"
  2791. ],
  2792. "text/plain": [
  2793. " Combined\n",
  2794. "8 남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...\n",
  2795. "322 전라북도교육청 115.95.20.116 80 211.251.39.65 2447 TC...\n",
  2796. "1064 전라북도교육청 211.210.30.28 80 211.251.133.40 4716 T...\n",
  2797. "1419 전라북도교육청 211.210.30.28 80 211.251.122.33 4523 T...\n",
  2798. "2973 남서울대학교 202.176.5.136 8136 192.168.107.59 49207..."
  2799. ]
  2800. },
  2801. "metadata": {},
  2802. "execution_count": 392
  2803. }
  2804. ],
  2805. "metadata": {}
  2806. },
  2807. {
  2808. "cell_type": "code",
  2809. "execution_count": 393,
  2810. "source": [
  2811. "# Change the type to list in order to apply the algorithm(nested list)\n",
  2812. "MTM_tolist=MTM_to_df.values.tolist()\n",
  2813. "MTM_tolist[:5]"
  2814. ],
  2815. "outputs": [
  2816. {
  2817. "output_type": "execute_result",
  2818. "data": {
  2819. "text/plain": [
  2820. "[[\"남서울대학교 159.69.250.163 80 192.170.112.14 8225 독일 [] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"],\n",
  2821. " [\"전라북도교육청 115.95.20.116 80 211.251.39.65 2447 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"],\n",
  2822. " [\"전라북도교육청 211.210.30.28 80 211.251.133.40 4716 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_11=공인-기타'] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"],\n",
  2823. " [\"전라북도교육청 211.210.30.28 80 211.251.122.33 4523 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_11=공인-기타'] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"],\n",
  2824. " [\"남서울대학교 202.176.5.136 8136 192.168.107.59 49207 TCP 말레이시아 [] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"]]"
  2825. ]
  2826. },
  2827. "metadata": {},
  2828. "execution_count": 393
  2829. }
  2830. ],
  2831. "metadata": {}
  2832. },
  2833. {
  2834. "cell_type": "code",
  2835. "execution_count": 394,
  2836. "source": [
  2837. "# Apply prefixspan\n",
  2838. "PrefixSpan_MTM = PrefixSpan(MTM_tolist)\n",
  2839. "\n",
  2840. "###### Interchangeable ######\n",
  2841. "# Get any over frequency 1 \n",
  2842. "prefix_MTM=PrefixSpan_MTM.frequent(1)\n",
  2843. "prefix_MTM[:3]"
  2844. ],
  2845. "outputs": [
  2846. {
  2847. "output_type": "execute_result",
  2848. "data": {
  2849. "text/plain": [
  2850. "[(1,\n",
  2851. " [\"남서울대학교 159.69.250.163 80 192.170.112.14 8225 독일 [] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"]),\n",
  2852. " (1,\n",
  2853. " [\"전라북도교육청 115.95.20.116 80 211.251.39.65 2447 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)'] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"]),\n",
  2854. " (1,\n",
  2855. " [\"전라북도교육청 211.210.30.28 80 211.251.133.40 4716 TCP 대한민국 ['RISK_V2.ASSETS_VAL_1=공인-전체IP대역(유선)', 'RISK_V2.ASSETS_VAL_11=공인-기타'] ['RISK_V2.INTENT_VAL_0=Default'] [] Attack Malwr_Infected_29_11_exe_collect_20042001ECSC\"])]"
  2856. ]
  2857. },
  2858. "metadata": {},
  2859. "execution_count": 394
  2860. }
  2861. ],
  2862. "metadata": {}
  2863. },
  2864. {
  2865. "cell_type": "code",
  2866. "execution_count": 395,
  2867. "source": [
  2868. "# Put the result to DataFrame\n",
  2869. "prefix_MTM_df=pd.DataFrame(prefix_MTM)\n",
  2870. "prefix_MTM_df[:5]"
  2871. ],
  2872. "outputs": [
  2873. {
  2874. "output_type": "execute_result",
  2875. "data": {
  2876. "text/html": [
  2877. "<div>\n",
  2878. "<style scoped>\n",
  2879. " .dataframe tbody tr th:only-of-type {\n",
  2880. " vertical-align: middle;\n",
  2881. " }\n",
  2882. "\n",
  2883. " .dataframe tbody tr th {\n",
  2884. " vertical-align: top;\n",
  2885. " }\n",
  2886. "\n",
  2887. " .dataframe thead th {\n",
  2888. " text-align: right;\n",
  2889. " }\n",
  2890. "</style>\n",
  2891. "<table border=\"1\" class=\"dataframe\">\n",
  2892. " <thead>\n",
  2893. " <tr style=\"text-align: right;\">\n",
  2894. " <th></th>\n",
  2895. " <th>0</th>\n",
  2896. " <th>1</th>\n",
  2897. " </tr>\n",
  2898. " </thead>\n",
  2899. " <tbody>\n",
  2900. " <tr>\n",
  2901. " <th>0</th>\n",
  2902. " <td>1</td>\n",
  2903. " <td>[남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
  2904. " </tr>\n",
  2905. " <tr>\n",
  2906. " <th>1</th>\n",
  2907. " <td>1</td>\n",
  2908. " <td>[전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...</td>\n",
  2909. " </tr>\n",
  2910. " <tr>\n",
  2911. " <th>2</th>\n",
  2912. " <td>1</td>\n",
  2913. " <td>[전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ...</td>\n",
  2914. " </tr>\n",
  2915. " <tr>\n",
  2916. " <th>3</th>\n",
  2917. " <td>1</td>\n",
  2918. " <td>[전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ...</td>\n",
  2919. " </tr>\n",
  2920. " <tr>\n",
  2921. " <th>4</th>\n",
  2922. " <td>1</td>\n",
  2923. " <td>[남서울대학교 202.176.5.136 8136 192.168.107.59 4920...</td>\n",
  2924. " </tr>\n",
  2925. " </tbody>\n",
  2926. "</table>\n",
  2927. "</div>"
  2928. ],
  2929. "text/plain": [
  2930. " 0 1\n",
  2931. "0 1 [남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...\n",
  2932. "1 1 [전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...\n",
  2933. "2 1 [전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ...\n",
  2934. "3 1 [전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ...\n",
  2935. "4 1 [남서울대학교 202.176.5.136 8136 192.168.107.59 4920..."
  2936. ]
  2937. },
  2938. "metadata": {},
  2939. "execution_count": 395
  2940. }
  2941. ],
  2942. "metadata": {}
  2943. },
  2944. {
  2945. "cell_type": "code",
  2946. "execution_count": 396,
  2947. "source": [
  2948. "# Change the columns name\n",
  2949. "prefix_MTM_df.rename(columns={0:'Frequency',1:'Cause'},inplace=True)\n",
  2950. "\n",
  2951. "# Make the new column for filling the Effect\n",
  2952. "prefix_MTM_df['Effect']=np.nan\n",
  2953. "\n",
  2954. "# Change the order of columns\n",
  2955. "prefix_MTM_df=prefix_MTM_df[['Cause','Effect','Frequency']]\n",
  2956. "prefix_MTM_df[:2]"
  2957. ],
  2958. "outputs": [
  2959. {
  2960. "output_type": "execute_result",
  2961. "data": {
  2962. "text/html": [
  2963. "<div>\n",
  2964. "<style scoped>\n",
  2965. " .dataframe tbody tr th:only-of-type {\n",
  2966. " vertical-align: middle;\n",
  2967. " }\n",
  2968. "\n",
  2969. " .dataframe tbody tr th {\n",
  2970. " vertical-align: top;\n",
  2971. " }\n",
  2972. "\n",
  2973. " .dataframe thead th {\n",
  2974. " text-align: right;\n",
  2975. " }\n",
  2976. "</style>\n",
  2977. "<table border=\"1\" class=\"dataframe\">\n",
  2978. " <thead>\n",
  2979. " <tr style=\"text-align: right;\">\n",
  2980. " <th></th>\n",
  2981. " <th>Cause</th>\n",
  2982. " <th>Effect</th>\n",
  2983. " <th>Frequency</th>\n",
  2984. " </tr>\n",
  2985. " </thead>\n",
  2986. " <tbody>\n",
  2987. " <tr>\n",
  2988. " <th>0</th>\n",
  2989. " <td>[남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
  2990. " <td>NaN</td>\n",
  2991. " <td>1</td>\n",
  2992. " </tr>\n",
  2993. " <tr>\n",
  2994. " <th>1</th>\n",
  2995. " <td>[전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...</td>\n",
  2996. " <td>NaN</td>\n",
  2997. " <td>1</td>\n",
  2998. " </tr>\n",
  2999. " </tbody>\n",
  3000. "</table>\n",
  3001. "</div>"
  3002. ],
  3003. "text/plain": [
  3004. " Cause Effect Frequency\n",
  3005. "0 [남서울대학교 159.69.250.163 80 192.170.112.14 8225 ... NaN 1\n",
  3006. "1 [전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T... NaN 1"
  3007. ]
  3008. },
  3009. "metadata": {},
  3010. "execution_count": 396
  3011. }
  3012. ],
  3013. "metadata": {}
  3014. },
  3015. {
  3016. "cell_type": "code",
  3017. "execution_count": 397,
  3018. "source": [
  3019. "# Define the function that find the rule name \n",
  3020. "def generate_cause_MTM(cell):\n",
  3021. " drules=['Attack','DDOS','HACK','MAIL','Malwr','WEB']\n",
  3022. " for drule in drules:\n",
  3023. " if ' '+drule in cell[0]:\n",
  3024. " return drule \n",
  3025. " return ''\n",
  3026. " \n",
  3027. "# Mapping the rule name with cause that is the effect\n",
  3028. "effect_MTM=list(map(generate_cause, prefix_MTM_df.Cause))\n",
  3029. "\n",
  3030. "# Assign the rule name as an effect\n",
  3031. "prefix_MTM_df['Effect']=effect_MTM\n",
  3032. "prefix_MTM_df.sort_values(by=['Frequency'],ascending=False)"
  3033. ],
  3034. "outputs": [
  3035. {
  3036. "output_type": "execute_result",
  3037. "data": {
  3038. "text/html": [
  3039. "<div>\n",
  3040. "<style scoped>\n",
  3041. " .dataframe tbody tr th:only-of-type {\n",
  3042. " vertical-align: middle;\n",
  3043. " }\n",
  3044. "\n",
  3045. " .dataframe tbody tr th {\n",
  3046. " vertical-align: top;\n",
  3047. " }\n",
  3048. "\n",
  3049. " .dataframe thead th {\n",
  3050. " text-align: right;\n",
  3051. " }\n",
  3052. "</style>\n",
  3053. "<table border=\"1\" class=\"dataframe\">\n",
  3054. " <thead>\n",
  3055. " <tr style=\"text-align: right;\">\n",
  3056. " <th></th>\n",
  3057. " <th>Cause</th>\n",
  3058. " <th>Effect</th>\n",
  3059. " <th>Frequency</th>\n",
  3060. " </tr>\n",
  3061. " </thead>\n",
  3062. " <tbody>\n",
  3063. " <tr>\n",
  3064. " <th>0</th>\n",
  3065. " <td>[남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
  3066. " <td>Attack</td>\n",
  3067. " <td>1</td>\n",
  3068. " </tr>\n",
  3069. " <tr>\n",
  3070. " <th>1</th>\n",
  3071. " <td>[전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...</td>\n",
  3072. " <td>Attack</td>\n",
  3073. " <td>1</td>\n",
  3074. " </tr>\n",
  3075. " <tr>\n",
  3076. " <th>2</th>\n",
  3077. " <td>[전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ...</td>\n",
  3078. " <td>Attack</td>\n",
  3079. " <td>1</td>\n",
  3080. " </tr>\n",
  3081. " <tr>\n",
  3082. " <th>3</th>\n",
  3083. " <td>[전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ...</td>\n",
  3084. " <td>Attack</td>\n",
  3085. " <td>1</td>\n",
  3086. " </tr>\n",
  3087. " <tr>\n",
  3088. " <th>4</th>\n",
  3089. " <td>[남서울대학교 202.176.5.136 8136 192.168.107.59 4920...</td>\n",
  3090. " <td>Attack</td>\n",
  3091. " <td>1</td>\n",
  3092. " </tr>\n",
  3093. " <tr>\n",
  3094. " <th>5</th>\n",
  3095. " <td>[경남대학교 209.250.247.60 80 203.253.180.71 2073 T...</td>\n",
  3096. " <td>Attack</td>\n",
  3097. " <td>1</td>\n",
  3098. " </tr>\n",
  3099. " <tr>\n",
  3100. " <th>6</th>\n",
  3101. " <td>[전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ...</td>\n",
  3102. " <td>Attack</td>\n",
  3103. " <td>1</td>\n",
  3104. " </tr>\n",
  3105. " <tr>\n",
  3106. " <th>7</th>\n",
  3107. " <td>[부산대학교 164.125.248.75 1098 195.158.31.58 2961 ...</td>\n",
  3108. " <td>Attack</td>\n",
  3109. " <td>1</td>\n",
  3110. " </tr>\n",
  3111. " <tr>\n",
  3112. " <th>8</th>\n",
  3113. " <td>[부산대학교 164.125.242.79 52896 46.249.119.133 494...</td>\n",
  3114. " <td>Attack</td>\n",
  3115. " <td>1</td>\n",
  3116. " </tr>\n",
  3117. " <tr>\n",
  3118. " <th>9</th>\n",
  3119. " <td>[전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ...</td>\n",
  3120. " <td>Attack</td>\n",
  3121. " <td>1</td>\n",
  3122. " </tr>\n",
  3123. " <tr>\n",
  3124. " <th>10</th>\n",
  3125. " <td>[전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T...</td>\n",
  3126. " <td>Attack</td>\n",
  3127. " <td>1</td>\n",
  3128. " </tr>\n",
  3129. " <tr>\n",
  3130. " <th>11</th>\n",
  3131. " <td>[남서울대학교 173.208.153.130 8130 220.68.191.80 498...</td>\n",
  3132. " <td>Attack</td>\n",
  3133. " <td>1</td>\n",
  3134. " </tr>\n",
  3135. " <tr>\n",
  3136. " <th>12</th>\n",
  3137. " <td>[경남대학교 211.231.104.12 80 203.253.179.17 55066 ...</td>\n",
  3138. " <td>Attack</td>\n",
  3139. " <td>1</td>\n",
  3140. " </tr>\n",
  3141. " <tr>\n",
  3142. " <th>13</th>\n",
  3143. " <td>[부산대학교 164.125.169.37 3697 45.80.184.171 80 TC...</td>\n",
  3144. " <td>Attack</td>\n",
  3145. " <td>1</td>\n",
  3146. " </tr>\n",
  3147. " <tr>\n",
  3148. " <th>14</th>\n",
  3149. " <td>[남서울대학교 172.83.155.170 8170 220.68.191.24 4919...</td>\n",
  3150. " <td>Attack</td>\n",
  3151. " <td>1</td>\n",
  3152. " </tr>\n",
  3153. " <tr>\n",
  3154. " <th>15</th>\n",
  3155. " <td>[부산대학교 164.125.244.196 1098 203.96.170.114 570...</td>\n",
  3156. " <td>Attack</td>\n",
  3157. " <td>1</td>\n",
  3158. " </tr>\n",
  3159. " </tbody>\n",
  3160. "</table>\n",
  3161. "</div>"
  3162. ],
  3163. "text/plain": [
  3164. " Cause Effect Frequency\n",
  3165. "0 [남서울대학교 159.69.250.163 80 192.170.112.14 8225 ... Attack 1\n",
  3166. "1 [전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T... Attack 1\n",
  3167. "2 [전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ... Attack 1\n",
  3168. "3 [전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ... Attack 1\n",
  3169. "4 [남서울대학교 202.176.5.136 8136 192.168.107.59 4920... Attack 1\n",
  3170. "5 [경남대학교 209.250.247.60 80 203.253.180.71 2073 T... Attack 1\n",
  3171. "6 [전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ... Attack 1\n",
  3172. "7 [부산대학교 164.125.248.75 1098 195.158.31.58 2961 ... Attack 1\n",
  3173. "8 [부산대학교 164.125.242.79 52896 46.249.119.133 494... Attack 1\n",
  3174. "9 [전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ... Attack 1\n",
  3175. "10 [전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T... Attack 1\n",
  3176. "11 [남서울대학교 173.208.153.130 8130 220.68.191.80 498... Attack 1\n",
  3177. "12 [경남대학교 211.231.104.12 80 203.253.179.17 55066 ... Attack 1\n",
  3178. "13 [부산대학교 164.125.169.37 3697 45.80.184.171 80 TC... Attack 1\n",
  3179. "14 [남서울대학교 172.83.155.170 8170 220.68.191.24 4919... Attack 1\n",
  3180. "15 [부산대학교 164.125.244.196 1098 203.96.170.114 570... Attack 1"
  3181. ]
  3182. },
  3183. "metadata": {},
  3184. "execution_count": 397
  3185. }
  3186. ],
  3187. "metadata": {}
  3188. },
  3189. {
  3190. "cell_type": "code",
  3191. "execution_count": 399,
  3192. "source": [
  3193. "# Attack Filter\n",
  3194. "def Attack_filter_MTM(ps):\n",
  3195. " return ' Attack' in ps[0]\n",
  3196. "\n",
  3197. "att_filter_MTM=prefix_MTM_df[list(map(Attack_filter_MTM, prefix_MTM_df.Cause))].fillna('Attack')\n",
  3198. "\n",
  3199. "# Malwr Filter\n",
  3200. "def Malwr_filter_MTM(ps):\n",
  3201. " return ' Malwr' in ps[0]\n",
  3202. "\n",
  3203. "mal_filter_MTM=prefix_MTM_df[list(map(Malwr_filter_MTM, prefix_MTM_df.Cause))].fillna('Malwr')\n",
  3204. "\n",
  3205. "# DDOS Filter\n",
  3206. "def DDOS_filter_MTM(ps):\n",
  3207. " return ' DDOS' in ps[0]\n",
  3208. "\n",
  3209. "dd_filter_MTM=prefix_MTM_df[list(map(DDOS_filter_MTM, prefix_MTM_df.Cause))].fillna('DDOS')\n",
  3210. "\n",
  3211. "# HACK Filter\n",
  3212. "def HACK_filter_MTM(ps):\n",
  3213. " return ' HACK' in ps[0]\n",
  3214. "\n",
  3215. "hack_filter_MTM=prefix_MTM_df[list(map(HACK_filter_MTM, prefix_MTM_df.Cause))].fillna('HACK')\n",
  3216. "\n",
  3217. "# MAIL Filter\n",
  3218. "def MAIL_filter_MTM(ps):\n",
  3219. " return ' MAIL' in ps[0]\n",
  3220. "\n",
  3221. "mail_filter_MTM=prefix_MTM_df[list(map(MAIL_filter_MTM, prefix_MTM_df.Cause))].fillna('MAIL')\n",
  3222. "\n",
  3223. "# WEB Filter\n",
  3224. "def WEB_filter_MTM(ps):\n",
  3225. " return ' WEB' in ps[0]\n",
  3226. "\n",
  3227. "prefix_MTM_df[:5]\n",
  3228. "web_filter_MTM=prefix_MTM_df[list(map(WEB_filter_MTM, prefix_MTM_df.Cause))].fillna('WEB')\n",
  3229. "\n",
  3230. "frames_MTM = [att_filter_MTM, mal_filter_MTM, dd_filter_MTM, hack_filter_MTM, mail_filter_MTM, web_filter_MTM]\n",
  3231. "result_MTM = pd.concat(frames_MTM)\n",
  3232. "result_MTM.sort_values(by=['Frequency'],ascending=False)"
  3233. ],
  3234. "outputs": [
  3235. {
  3236. "output_type": "execute_result",
  3237. "data": {
  3238. "text/html": [
  3239. "<div>\n",
  3240. "<style scoped>\n",
  3241. " .dataframe tbody tr th:only-of-type {\n",
  3242. " vertical-align: middle;\n",
  3243. " }\n",
  3244. "\n",
  3245. " .dataframe tbody tr th {\n",
  3246. " vertical-align: top;\n",
  3247. " }\n",
  3248. "\n",
  3249. " .dataframe thead th {\n",
  3250. " text-align: right;\n",
  3251. " }\n",
  3252. "</style>\n",
  3253. "<table border=\"1\" class=\"dataframe\">\n",
  3254. " <thead>\n",
  3255. " <tr style=\"text-align: right;\">\n",
  3256. " <th></th>\n",
  3257. " <th>Cause</th>\n",
  3258. " <th>Effect</th>\n",
  3259. " <th>Frequency</th>\n",
  3260. " </tr>\n",
  3261. " </thead>\n",
  3262. " <tbody>\n",
  3263. " <tr>\n",
  3264. " <th>0</th>\n",
  3265. " <td>[남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
  3266. " <td>Attack</td>\n",
  3267. " <td>1</td>\n",
  3268. " </tr>\n",
  3269. " <tr>\n",
  3270. " <th>1</th>\n",
  3271. " <td>[전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...</td>\n",
  3272. " <td>Attack</td>\n",
  3273. " <td>1</td>\n",
  3274. " </tr>\n",
  3275. " <tr>\n",
  3276. " <th>14</th>\n",
  3277. " <td>[남서울대학교 172.83.155.170 8170 220.68.191.24 4919...</td>\n",
  3278. " <td>Attack</td>\n",
  3279. " <td>1</td>\n",
  3280. " </tr>\n",
  3281. " <tr>\n",
  3282. " <th>13</th>\n",
  3283. " <td>[부산대학교 164.125.169.37 3697 45.80.184.171 80 TC...</td>\n",
  3284. " <td>Attack</td>\n",
  3285. " <td>1</td>\n",
  3286. " </tr>\n",
  3287. " <tr>\n",
  3288. " <th>12</th>\n",
  3289. " <td>[경남대학교 211.231.104.12 80 203.253.179.17 55066 ...</td>\n",
  3290. " <td>Attack</td>\n",
  3291. " <td>1</td>\n",
  3292. " </tr>\n",
  3293. " <tr>\n",
  3294. " <th>11</th>\n",
  3295. " <td>[남서울대학교 173.208.153.130 8130 220.68.191.80 498...</td>\n",
  3296. " <td>Attack</td>\n",
  3297. " <td>1</td>\n",
  3298. " </tr>\n",
  3299. " <tr>\n",
  3300. " <th>10</th>\n",
  3301. " <td>[전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T...</td>\n",
  3302. " <td>Attack</td>\n",
  3303. " <td>1</td>\n",
  3304. " </tr>\n",
  3305. " <tr>\n",
  3306. " <th>9</th>\n",
  3307. " <td>[전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ...</td>\n",
  3308. " <td>Attack</td>\n",
  3309. " <td>1</td>\n",
  3310. " </tr>\n",
  3311. " <tr>\n",
  3312. " <th>8</th>\n",
  3313. " <td>[부산대학교 164.125.242.79 52896 46.249.119.133 494...</td>\n",
  3314. " <td>Attack</td>\n",
  3315. " <td>1</td>\n",
  3316. " </tr>\n",
  3317. " <tr>\n",
  3318. " <th>7</th>\n",
  3319. " <td>[부산대학교 164.125.248.75 1098 195.158.31.58 2961 ...</td>\n",
  3320. " <td>Attack</td>\n",
  3321. " <td>1</td>\n",
  3322. " </tr>\n",
  3323. " <tr>\n",
  3324. " <th>6</th>\n",
  3325. " <td>[전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ...</td>\n",
  3326. " <td>Attack</td>\n",
  3327. " <td>1</td>\n",
  3328. " </tr>\n",
  3329. " <tr>\n",
  3330. " <th>5</th>\n",
  3331. " <td>[경남대학교 209.250.247.60 80 203.253.180.71 2073 T...</td>\n",
  3332. " <td>Attack</td>\n",
  3333. " <td>1</td>\n",
  3334. " </tr>\n",
  3335. " <tr>\n",
  3336. " <th>4</th>\n",
  3337. " <td>[남서울대학교 202.176.5.136 8136 192.168.107.59 4920...</td>\n",
  3338. " <td>Attack</td>\n",
  3339. " <td>1</td>\n",
  3340. " </tr>\n",
  3341. " <tr>\n",
  3342. " <th>3</th>\n",
  3343. " <td>[전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ...</td>\n",
  3344. " <td>Attack</td>\n",
  3345. " <td>1</td>\n",
  3346. " </tr>\n",
  3347. " <tr>\n",
  3348. " <th>2</th>\n",
  3349. " <td>[전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ...</td>\n",
  3350. " <td>Attack</td>\n",
  3351. " <td>1</td>\n",
  3352. " </tr>\n",
  3353. " <tr>\n",
  3354. " <th>1</th>\n",
  3355. " <td>[전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T...</td>\n",
  3356. " <td>Attack</td>\n",
  3357. " <td>1</td>\n",
  3358. " </tr>\n",
  3359. " <tr>\n",
  3360. " <th>0</th>\n",
  3361. " <td>[남서울대학교 159.69.250.163 80 192.170.112.14 8225 ...</td>\n",
  3362. " <td>Attack</td>\n",
  3363. " <td>1</td>\n",
  3364. " </tr>\n",
  3365. " <tr>\n",
  3366. " <th>15</th>\n",
  3367. " <td>[부산대학교 164.125.244.196 1098 203.96.170.114 570...</td>\n",
  3368. " <td>Attack</td>\n",
  3369. " <td>1</td>\n",
  3370. " </tr>\n",
  3371. " <tr>\n",
  3372. " <th>14</th>\n",
  3373. " <td>[남서울대학교 172.83.155.170 8170 220.68.191.24 4919...</td>\n",
  3374. " <td>Attack</td>\n",
  3375. " <td>1</td>\n",
  3376. " </tr>\n",
  3377. " <tr>\n",
  3378. " <th>13</th>\n",
  3379. " <td>[부산대학교 164.125.169.37 3697 45.80.184.171 80 TC...</td>\n",
  3380. " <td>Attack</td>\n",
  3381. " <td>1</td>\n",
  3382. " </tr>\n",
  3383. " <tr>\n",
  3384. " <th>12</th>\n",
  3385. " <td>[경남대학교 211.231.104.12 80 203.253.179.17 55066 ...</td>\n",
  3386. " <td>Attack</td>\n",
  3387. " <td>1</td>\n",
  3388. " </tr>\n",
  3389. " <tr>\n",
  3390. " <th>11</th>\n",
  3391. " <td>[남서울대학교 173.208.153.130 8130 220.68.191.80 498...</td>\n",
  3392. " <td>Attack</td>\n",
  3393. " <td>1</td>\n",
  3394. " </tr>\n",
  3395. " <tr>\n",
  3396. " <th>10</th>\n",
  3397. " <td>[전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T...</td>\n",
  3398. " <td>Attack</td>\n",
  3399. " <td>1</td>\n",
  3400. " </tr>\n",
  3401. " <tr>\n",
  3402. " <th>9</th>\n",
  3403. " <td>[전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ...</td>\n",
  3404. " <td>Attack</td>\n",
  3405. " <td>1</td>\n",
  3406. " </tr>\n",
  3407. " <tr>\n",
  3408. " <th>8</th>\n",
  3409. " <td>[부산대학교 164.125.242.79 52896 46.249.119.133 494...</td>\n",
  3410. " <td>Attack</td>\n",
  3411. " <td>1</td>\n",
  3412. " </tr>\n",
  3413. " <tr>\n",
  3414. " <th>7</th>\n",
  3415. " <td>[부산대학교 164.125.248.75 1098 195.158.31.58 2961 ...</td>\n",
  3416. " <td>Attack</td>\n",
  3417. " <td>1</td>\n",
  3418. " </tr>\n",
  3419. " <tr>\n",
  3420. " <th>6</th>\n",
  3421. " <td>[전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ...</td>\n",
  3422. " <td>Attack</td>\n",
  3423. " <td>1</td>\n",
  3424. " </tr>\n",
  3425. " <tr>\n",
  3426. " <th>5</th>\n",
  3427. " <td>[경남대학교 209.250.247.60 80 203.253.180.71 2073 T...</td>\n",
  3428. " <td>Attack</td>\n",
  3429. " <td>1</td>\n",
  3430. " </tr>\n",
  3431. " <tr>\n",
  3432. " <th>4</th>\n",
  3433. " <td>[남서울대학교 202.176.5.136 8136 192.168.107.59 4920...</td>\n",
  3434. " <td>Attack</td>\n",
  3435. " <td>1</td>\n",
  3436. " </tr>\n",
  3437. " <tr>\n",
  3438. " <th>3</th>\n",
  3439. " <td>[전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ...</td>\n",
  3440. " <td>Attack</td>\n",
  3441. " <td>1</td>\n",
  3442. " </tr>\n",
  3443. " <tr>\n",
  3444. " <th>2</th>\n",
  3445. " <td>[전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ...</td>\n",
  3446. " <td>Attack</td>\n",
  3447. " <td>1</td>\n",
  3448. " </tr>\n",
  3449. " <tr>\n",
  3450. " <th>15</th>\n",
  3451. " <td>[부산대학교 164.125.244.196 1098 203.96.170.114 570...</td>\n",
  3452. " <td>Attack</td>\n",
  3453. " <td>1</td>\n",
  3454. " </tr>\n",
  3455. " </tbody>\n",
  3456. "</table>\n",
  3457. "</div>"
  3458. ],
  3459. "text/plain": [
  3460. " Cause Effect Frequency\n",
  3461. "0 [남서울대학교 159.69.250.163 80 192.170.112.14 8225 ... Attack 1\n",
  3462. "1 [전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T... Attack 1\n",
  3463. "14 [남서울대학교 172.83.155.170 8170 220.68.191.24 4919... Attack 1\n",
  3464. "13 [부산대학교 164.125.169.37 3697 45.80.184.171 80 TC... Attack 1\n",
  3465. "12 [경남대학교 211.231.104.12 80 203.253.179.17 55066 ... Attack 1\n",
  3466. "11 [남서울대학교 173.208.153.130 8130 220.68.191.80 498... Attack 1\n",
  3467. "10 [전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T... Attack 1\n",
  3468. "9 [전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ... Attack 1\n",
  3469. "8 [부산대학교 164.125.242.79 52896 46.249.119.133 494... Attack 1\n",
  3470. "7 [부산대학교 164.125.248.75 1098 195.158.31.58 2961 ... Attack 1\n",
  3471. "6 [전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ... Attack 1\n",
  3472. "5 [경남대학교 209.250.247.60 80 203.253.180.71 2073 T... Attack 1\n",
  3473. "4 [남서울대학교 202.176.5.136 8136 192.168.107.59 4920... Attack 1\n",
  3474. "3 [전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ... Attack 1\n",
  3475. "2 [전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ... Attack 1\n",
  3476. "1 [전라북도교육청 115.95.20.116 80 211.251.39.65 2447 T... Attack 1\n",
  3477. "0 [남서울대학교 159.69.250.163 80 192.170.112.14 8225 ... Attack 1\n",
  3478. "15 [부산대학교 164.125.244.196 1098 203.96.170.114 570... Attack 1\n",
  3479. "14 [남서울대학교 172.83.155.170 8170 220.68.191.24 4919... Attack 1\n",
  3480. "13 [부산대학교 164.125.169.37 3697 45.80.184.171 80 TC... Attack 1\n",
  3481. "12 [경남대학교 211.231.104.12 80 203.253.179.17 55066 ... Attack 1\n",
  3482. "11 [남서울대학교 173.208.153.130 8130 220.68.191.80 498... Attack 1\n",
  3483. "10 [전라북도교육청 37.1.197.70 80 211.251.78.210 49737 T... Attack 1\n",
  3484. "9 [전라북도교육청 219.249.231.84 80 211.251.82.1 52721 ... Attack 1\n",
  3485. "8 [부산대학교 164.125.242.79 52896 46.249.119.133 494... Attack 1\n",
  3486. "7 [부산대학교 164.125.248.75 1098 195.158.31.58 2961 ... Attack 1\n",
  3487. "6 [전라북도교육청 211.210.30.28 80 211.251.117.65 5212 ... Attack 1\n",
  3488. "5 [경남대학교 209.250.247.60 80 203.253.180.71 2073 T... Attack 1\n",
  3489. "4 [남서울대학교 202.176.5.136 8136 192.168.107.59 4920... Attack 1\n",
  3490. "3 [전라북도교육청 211.210.30.28 80 211.251.122.33 4523 ... Attack 1\n",
  3491. "2 [전라북도교육청 211.210.30.28 80 211.251.133.40 4716 ... Attack 1\n",
  3492. "15 [부산대학교 164.125.244.196 1098 203.96.170.114 570... Attack 1"
  3493. ]
  3494. },
  3495. "metadata": {},
  3496. "execution_count": 399
  3497. }
  3498. ],
  3499. "metadata": {}
  3500. },
  3501. {
  3502. "cell_type": "code",
  3503. "execution_count": null,
  3504. "source": [],
  3505. "outputs": [],
  3506. "metadata": {}
  3507. }
  3508. ],
  3509. "metadata": {
  3510. "orig_nbformat": 4,
  3511. "language_info": {
  3512. "name": "python",
  3513. "version": "3.8.8",
  3514. "mimetype": "text/x-python",
  3515. "codemirror_mode": {
  3516. "name": "ipython",
  3517. "version": 3
  3518. },
  3519. "pygments_lexer": "ipython3",
  3520. "nbconvert_exporter": "python",
  3521. "file_extension": ".py"
  3522. },
  3523. "kernelspec": {
  3524. "name": "python3",
  3525. "display_name": "Python 3.8.8 64-bit ('anaconda3-2021.05': pyenv)"
  3526. },
  3527. "interpreter": {
  3528. "hash": "f4c95b739b6e12099bf04dd3c3302c87ef63de308852c06a666878cc26abb6cc"
  3529. }
  3530. },
  3531. "nbformat": 4,
  3532. "nbformat_minor": 2
  3533. }