download street to postcode relations from geofabrik (check Bandwidth and Connection Rate Limits) and save to mysql
sql:
create table osm_roads( id integer, name char( 255 ), category char( 255 ), postal_code char( 255 ));
shell:
wget -e robots=off -nd -r -l1 -np -A *.bz2 http://download.geofabrik.de/openstreetmap/europe/germany/ wget -O - https://notomorrow.de/_export/code/notess/osm_roads?codeblock=0 > osm_roads.py wget -O - https://notomorrow.de/_export/code/notes/osm_roads?codeblock=1 > osm_roads.sql mysql < osm_roads.sql for i in *.bz2; do bunzip2 $i && python osm_roads.py ${i%.bz2} && rm ${i%.bz2}; done;
todo: save cleaned street values, like ccity for cities
import sys import MySQLdb from pprint import pprint #pp = pprint.PrettyPrinter(indent=4) idx_replace = [ ' ', '-', '.', ',', '(', ')' ] def clean( s ): s = s.lower( ) for c in idx_replace: s = s.replace( c, '' ) return s def indexstr( l ): return '|'.join( map( clean, l )) def save( act, data ): #pprint( { 'save': data }) if 'addr:city' in data: data['city'] = data['addr:city'] if 'addr:country' in data: data['country'] = data['addr:country'] if 'addr:postcode' in data: data['postcode'] = data['addr:postcode'] if 'addr:street' in data: data['street'] = data['addr:street'] if 'highway' in data: data['category'] = data['highway'] if not 'steet' in data and 'name' in data: data['street'] = data['name'] if not 'postcode' in data and 'postal_code' in data: data['postcode'] = data['postal_code'] if not 'category' in data and 'building' in data: data['category'] = 'building' if not 'street' in data: return False if not 'postcode' in data or len( data['postcode'] ) != 5: return False if not 'category' in data: data['category'] = 'addr' if not 'city' in data: data['city'] = '' data['way_id'] = data['way_id'] found = { } idx = '' ## postcode from geo_db if data['city'] == '': c1.execute ( 'select name, sortname from geo_place where postcode=%s',( data['postcode'] )) found = c1.fetchone( ) if found is not None: pprint({ 'city found': found }) data['city'] = found[0] data['ccity'] = found[1] ## check existing c1.execute ( 'select id, city from osm_roads3 where street=%s and category=%s and postcode=%s and city=%s', ( data['street'], data['category'], data['postcode'], data['city'] )) found = c1.fetchone( ) if found is not None: pprint({ 'road found': found }) return True; ## insert roads if not 'ccity' in data: data['ccity'] = clean( data['city'] ) elif data['ccity'] != '': data['ccity'] = clean( data['ccity'] ) c2.execute ("""insert into osm_roads3( way_id, street, category, postcode, city, ccity ) VALUES( %s, %s, %s, %s, %s, %s )""", ( data['way_id'], data['street'], data['category'], data['postcode'], data['city'], data['ccity'] )) print "ins: %s" % data['way_id'] if data['city'] != '': ## update plz c1.execute ( 'select id from osm_plz where postcode=%s and city=%s', ( data['postcode'], data['city'] )) if c1.fetchone( ) is None: print "plz: %s - %s" % ( data['postcode'], data['city'] ) c2.execute ("""insert into osm_plz( postcode, city ) VALUES( %s, %s )""", ( data['postcode'], data['city'] )) ## update index idx = indexstr([ data['postcode'], data['city'], data['street'] ]) print "idx: %s" % idx c2.execute ("""insert into osm_idx( id, idx ) VALUES( %s, %s )""", ( data['way_id'], idx )) from lxml import etree def read_dump( infile ): data = {} f = open( infile ,'r' ) for ev, el in etree.iterparse( infile, events=( 'start', 'end' ) ): tag = el.tag if ev == 'start': if tag == 'way': k = data['way_id'] = el.attrib.get( 'id' ); elif 'way_id' in data: if tag == 'tag': a = el.attrib data[a.get( 'k' )] = a.get( 'v' ).encode( 'utf8' ) else: if tag == 'way': save( tag, data ); data = {} el.clear( ) try: conn = MySQLdb.connect (host = "localhost",user = "xxx",passwd = "xxx",db = "xxx") c1 = conn.cursor( ) c2 = conn.cursor () except MySQLdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit (1) print "readfile %s" % sys.argv[1] read_dump( sys.argv[1] ) print "commit" conn.commit( ) c1.close( ) c2.close( ) conn.close( ) sys.exit( )
CREATE DATABASE roads CHARACTER SET utf8; CONNECT roads; CREATE TABLE `osm_roads3` ( `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT, `way_id` CHAR(32) DEFAULT NULL, `category` CHAR(32) DEFAULT NULL, `postcode` CHAR(5) DEFAULT NULL, `street` CHAR(124) DEFAULT NULL, `city` CHAR(124) DEFAULT NULL, `idx` CHAR(255) DEFAULT NULL, `ccity` CHAR(255) DEFAULT NULL, UNIQUE KEY `id` (`id`) ); CREATE TABLE `osm_idx` ( `id` BIGINT(20) DEFAULT NULL, `idx` CHAR(255) DEFAULT NULL ); CREATE TABLE `osm_plz` ( `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT, `postcode` CHAR(5) DEFAULT NULL, `city` CHAR(255) DEFAULT NULL, UNIQUE KEY `id` (`id`) ); -- prefill with postcodes CREATE TABLE `geo_place` ( `id` INT(11) DEFAULT NULL, `postcode` CHAR(255) DEFAULT NULL, `category` CHAR(255) DEFAULT NULL, `name` CHAR(255) DEFAULT NULL, `sortname` CHAR(255) DEFAULT NULL );