1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
|
import numpy as np import pandas as pd import csv import time
global label_list
list_big=[]
def preHandel_data (): source_file = 'kddcup.data_10_percent_corrected' handled_file = 'kddcup.data_10_percent_corrected.csv' data_file = open (handled_file, 'w', newline='') with open (source_file, 'r') as data_source: csv_reader = csv.reader (data_source) csv_writer = csv.writer (data_file) count = 0 for row in csv_reader: temp_line = np.array (row) list_big.append (int(temp_line [4])) list_big.append (int(temp_line [5])) temp_line [1] = handleProtocol (row) temp_line [2] = handleService (row) temp_line [3] = handleFlag (row) temp_line [4] = handlenorm (int(row[4])) temp_line [5] = handlenorm (int(row[5])) temp_line [41] = handleLabel (row) csv_writer.writerow (temp_line) count += 1 data_file.close ()
def find_index (x, y): return [i for i in range (len (y)) if y [i] == x]
def handleProtocol (input): protocol_list = ['tcp', 'udp', 'icmp'] if input [1] in protocol_list: return find_index (input [1], protocol_list) [0]
def handleService (input): service_list = ['aol', 'auth', 'bgp', 'courier', 'csnet_ns', 'ctf', 'daytime', 'discard', 'domain', 'domain_u', 'echo', 'eco_i', 'ecr_i', 'efs', 'exec', 'finger', 'ftp', 'ftp_data', 'gopher', 'harvest', 'hostnames', 'http', 'http_2784', 'http_443', 'http_8001', 'imap4', 'IRC', 'iso_tsap', 'klogin', 'kshell', 'ldap', 'link', 'login', 'mtp', 'name', 'netbios_dgm', 'netbios_ns', 'netbios_ssn', 'netstat', 'nnsp', 'nntp', 'ntp_u', 'other', 'pm_dump', 'pop_2', 'pop_3', 'printer', 'private', 'red_i', 'remote_job', 'rje', 'shell', 'smtp', 'sql_net', 'ssh', 'sunrpc', 'supdup', 'systat', 'telnet', 'tftp_u', 'tim_i', 'time', 'urh_i', 'urp_i', 'uucp', 'uucp_path', 'vmnet', 'whois', 'X11', 'Z39_50'] if input [2] in service_list: return find_index (input [2], service_list) [0]
def handleFlag (input): flag_list = ['OTH', 'REJ', 'RSTO', 'RSTOS0', 'RSTR', 'S0', 'S1', 'S2', 'S3', 'SF', 'SH'] if input [3] in flag_list: return find_index (input [3], flag_list) [0]
def handleLabel (input): global label_list if input [41] in label_list: return find_index (input [41], label_list) [0] else: label_list.append (input [41]) return find_index (input [41], label_list) [0]
def handlenorm(input): max_data=max(list_big) min_data=min(list_big) results=255/max_data*(input-min_data) return results
if __name__ == '__main__': start_time = time.clock () global label_list label_list = [] preHandel_data () end_time = time.clock () print ("Running time:", (end_time - start_time))
|