#!/usr/bin/python # Max Dornseif 2003 # --md@hudora.de """Parser for wiscan data as created by popular WiLDing/Wardriving tools. Call parseWiScan(iterator) and get back tow dictionaries: one with Networks keyed by BSSID and another with unparsable lines, keyed by problem description. e.g. {'00:00:cb:07:82:e2': [{'bssid': '00:00:cb:07:82:e2', 'channel': None, 'flags': None, 'typ': 'BBS', 'beacon': None, 'ew': 'E', 'ns': 'N', 'lat': 50.958970999999998, 'lon': 6.9265819999999998, 'name': None, 'noise': 1, 'signal': 1, 'snr': 1, 'ssid': 'CCP-WEST', 'timeh': '19', 'timem': '06', 'times': '08', 'tz': 'GMT', 'timestamp': 1069524368, 'unknown': 'unknown'}, ... ], '00:01:24:f4:61:06': [{ ... }] } {'no regex match': ['N 9000000000\tE 18000000000\t( Tensegrity)\tBBS\t ( 00:80:c8:ac:60:e4 )\t16:20:40 (GMT)\t[ 1 1 1 ]\n', 'N 9000000000\tE 18000000000\t( CCP-WEST)\tBBS\t ( 00:00:cb:07:82:e2 )\t19:00:15 (GMT)\t[ 1 1 1 ]\n']} Tested formats: Netstumbler Windows: # $Creator: Network Stumbler Version 0.3.30 # $Format: wi-scan with extensions # Latitude Longitude ( SSID ) Type ( BSSID ) Time (GMT) [ SNR Sig Noise ] # ( Name ) Flags Channelbits # $DateGMT: 2003-11-22 N 9000000000 E 18000000000 ( tmobile) BBS ( 00:0d:bd:a7:13:78 ) 15:09:58 (GMT) [ 1 1 1 ] # ( ) 0001 0040 100 kismet XML2wiscan (missing space after SSID) # $Creator: Network Stumbler Version 0.3.30 # $Format: wi-scan # Latitude Longitude ( SSID ) Type ( BSSID ) Time (GMT) [ SNR Sig Noise ] # $DateGMT: 2003-11-22 N 50.9641515 E 6.9345830 ( didapdd1) BBS ( 00:a0:f8:3a:58:3c ) 19:15:08 (GMT) [ 1 1 1 ] KisMAC # $Creator: KisMAC NS export version 0.1 # $Format: wi-scan with extensions # Latitude Longitude ( SSID ) Type ( BSSID ) Time (GMT) [ SNR Sig Noise ] # ( Name ) Flags Channelbits BcnIntvl # $DateGMT: 2003-09-29 N 0.000000 E 0.000000 ( default ) BSS ( 00:40:05:52:D7:09 ) 14:24:58 (GMT) [ 10 10 0 ] # ( D-Link ) 0011 0000 0 Netstumbler Windows: # $Creator: Network Stumbler Version 0.3.22 # $Format: wi-scan summary with extensions # Latitude Longitude ( SSID ) Type ( BSSID ) Time (GMT) [ SNR Sig Noise ] # ( Name ) Flags Channelbits BcnIntvl # $DateGMT: 2001-11-16 N 0.0000000 E 0.0000000 ( WaveLAN Network ) BBS ( 00:60:1d:03:47:06 ) 23:39:19 (GMT) [ 25 7 1 46 ] # ( Lucent_Bonn ) 0001 0008 100 """ import re import datetime # the regular expression used for parsing # Latitude Longitude ( SSID ) Type ( BSSID ) Time (GMT) [ SNR Sig Noise ] # ( Name ) Flags Channelbits BcnIntvl # N 0.0000000 E 0.0000000 ( Petra ) BBS ( 00:60:b3:79:23:ea ) 18:21:06 (GMT) [ 30 72 42 ] # ( ) 0011 0040 0 wiscan_re = re.compile('(?P[NS])\s+(?P\d+\.\d+)\s+(?P[EW])\s+(?P\d+.\d+)\s+\( (?P.*?) ?\)\s+(?P\S+)\s+\( (?P.*?) \)\s+(?P\d\d):(?P\d\d):(?P\d\d) \((?P.*?)\)\s+\[ (?P-?\d+) (?P\d+) (?P\d+) \](\s+#\s+\( (?P.*?) \)\s+(?P\S+)\s+(?P\S+)\s+(?P\S+))?') def parseWiScan(lines, timeshift=0): """ Read wi-scan formated data line by line and return a dict with parsed data. 'lines' should be an iteratable object, eg a list or the iterator returned by file.readlines(). 'timeshift' can be used to add a certain amount of hours if timezone data in the file is wrong. Example: >>> parseWiScan(['N 0.0000000 E 0.0000000 ( WaveLAN Network ) BBS ( 00:60:1d:03:47:06 ) 23:39:19 (GMT) [ 25 71 46 ] # ( Lucent_Bonn ) 0001 0008 100']) ({'00:60:1d:03:47:06': [{'noise': 25, 'tz': 'GMT', 'ssid': 'WaveLAN Network', 'bssid': '00:60:1d:03:47:06', 'timestamp': 81559, 'signal': 71, 'lon': 0.0, 'times': '19', 'wlanopen': 'wlanopen', 'timem': '39', 'beacon': 100, 'flags': '0001', 'snr': '25', 'lat': 0.0, 'timeh': '23', 'ew': 'E', 'ns': 'N', 'typ': 'BBS', 'channel': '0008', 'name': 'Lucent_Bonn'}]}, {'no position': ['N 0.0000000 E 0.0000000 ( WaveLAN Network ) BBS ( 00:60:1d:03:47:06 ) 23:39:19 (GMT) [ 25 71 46 ] # ( Lucent_Bonn ) 0001 0008 100']}) """ problems = {} bssids = {} date = '1970-01-01' for l in lines: if l.startswith('#'): if l.startswith('# $DateGMT'): date = l.split(': ')[1] continue m = wiscan_re.match(l) if not m: problems.setdefault('no regex match', []).append(l) continue base = m.groupdict() base['lat'] = float(base['lat'].strip()) base['lon'] = float(base['lon'].strip()) if base['ns'] == 'S': base['lat'] = base['lat'] * -1 if base['ew'] == 'W': base['lon'] = base['lon'] * -1 year, month, day = date.strip().split('-') base['timestamp'] = int(datetime.datetime(int(year), int(month), int(day), int(base['timeh']), int(base['timem']), int(base['times'])).strftime('%s')) # correct GMT-mixup base['timestamp'] = base['timestamp'] + (60 * 60 * timeshift) if float(base['lat']) == 0 and float(base['lon']) == 0: problems.setdefault('no position', []).append(l) #base['lon'], base['lat'] = interpolate(base['lat'], base['lon'], base['timestamp']) base['noise'] = int(base['noise']) base['signal'] = int(base['signal']) base['noise'] = int(base['snr']) if base.get('beacon', None): base['beacon'] = int(base['beacon']) typ = 'unknown' if base['ssid'] == '': typ = 'wlanclosed' if base.get('flags', None): if int(base['flags'], 16) & 0x0010 == 0x0010: typ = 'wlanwep' else: typ = 'wlanopen' # 0001 ESS ('Infrastructure') # 0002 IBSS ('Ad-Hoc') # 0004 CF-Pollable # 0008 CF-Poll Request # 0010 Privacy ('WEP') # 0020 Short Preamble # 0040 PBCC # 0080 Channel Agility base[typ] = typ bssids.setdefault(base['bssid'], []).append(base) return bssids, problems def _test(): import doctest return doctest.testmod() if __name__ == '__main__': _test() #import sys, pprint #wlans, problems = parseWiScan(sys.stdin.readlines()) #pprint.pprint(problems) #pprint.pprint(wlans)