OpenStreetMap Road Data

download street to postcode relations from geofabrik (check Bandwidth and Connection Rate Limits) and save to mysql

sql:

create table osm_roads( id integer, name char( 255 ), category char( 255 ), postal_code char( 255 ));

shell:

wget -e robots=off -nd -r -l1 -np -A *.bz2 http://download.geofabrik.de/openstreetmap/europe/germany/
wget -O - https://notomorrow.de/_export/code/notess/osm_roads?codeblock=0 > osm_roads.py
wget -O - https://notomorrow.de/_export/code/notes/osm_roads?codeblock=1 > osm_roads.sql
mysql < osm_roads.sql
for i in *.bz2; do bunzip2 $i && python osm_roads.py ${i%.bz2} && rm ${i%.bz2}; done;

todo: save cleaned street values, like ccity for cities

osm_roads.py
import sys
import MySQLdb
 
from pprint import pprint
#pp = pprint.PrettyPrinter(indent=4)
 
idx_replace = [ ' ', '-', '.', ',', '(', ')' ]
def clean( s ):
    s = s.lower( )
    for c in idx_replace: s = s.replace( c, '' )
    return s
 
def indexstr( l ): return '|'.join( map( clean, l ))
 
def save( act, data ):
 
    #pprint( { 'save': data })
 
 
    if 'addr:city' in data:     data['city']     = data['addr:city']
    if 'addr:country' in data:  data['country']  = data['addr:country']
    if 'addr:postcode' in data: data['postcode'] = data['addr:postcode']
    if 'addr:street' in data:   data['street']   = data['addr:street']
 
    if 'highway' in data:
        data['category'] = data['highway']
        if not 'steet' in data and 'name' in data:
            data['street'] = data['name']
 
    if not 'postcode' in data and 'postal_code' in data:        data['postcode'] = data['postal_code']
    if not 'category' in data and 'building' in data:   data['category'] = 'building'
 
    if not 'street' in data: return False
    if not 'postcode' in data or len( data['postcode'] ) != 5: return False
 
    if not 'category' in data:  data['category'] = 'addr'
    if not 'city' in data:      data['city'] = ''
    data['way_id'] = data['way_id']
 
    found = { }
    idx = ''
 
    ## postcode from geo_db
    if data['city'] == '':
        c1.execute ( 'select name, sortname from geo_place where postcode=%s',( data['postcode'] ))
        found = c1.fetchone( )
        if found is not None:
            pprint({ 'city found': found })
            data['city'] = found[0]
            data['ccity'] = found[1]
 
    ## check existing
    c1.execute ( 'select id, city from osm_roads3 where street=%s and category=%s and postcode=%s and city=%s',
        ( data['street'], data['category'], data['postcode'], data['city'] ))
    found = c1.fetchone( )
    if found is not None:
        pprint({ 'road found': found })
        return True;
 
    ## insert roads
    if not 'ccity' in data: data['ccity'] = clean( data['city'] )
    elif data['ccity'] != '': data['ccity'] = clean( data['ccity'] )
 
    c2.execute ("""insert into osm_roads3( way_id, street, category, postcode, city, ccity )
                                VALUES( %s, %s, %s, %s, %s, %s )""",
        ( data['way_id'], data['street'], data['category'], data['postcode'], data['city'], data['ccity'] ))
    print "ins: %s" % data['way_id']
 
   if data['city'] != '':
        ## update plz
        c1.execute ( 'select id from osm_plz where postcode=%s and city=%s',
            ( data['postcode'], data['city'] ))
        if c1.fetchone( ) is None:
            print "plz: %s - %s" % ( data['postcode'], data['city'] )
            c2.execute ("""insert into osm_plz( postcode, city ) VALUES( %s, %s )""",
            ( data['postcode'], data['city'] ))
 
        ## update index
        idx = indexstr([ data['postcode'], data['city'], data['street'] ])
        print "idx: %s" % idx
        c2.execute ("""insert into osm_idx( id, idx ) VALUES( %s, %s )""",
                ( data['way_id'], idx ))
 
from lxml import etree
def read_dump( infile ):
    data = {}
    f = open( infile ,'r' )
    for ev, el in etree.iterparse( infile, events=( 'start', 'end' ) ):
        tag = el.tag
        if ev == 'start':
            if tag == 'way':
                k = data['way_id'] = el.attrib.get( 'id' );
            elif 'way_id' in data:
                if tag == 'tag':
                    a = el.attrib
                    data[a.get( 'k' )] = a.get( 'v' ).encode( 'utf8' )
        else:
            if tag == 'way':
                save( tag, data );
                data = {}
        el.clear( )
 
try:
    conn = MySQLdb.connect (host = "localhost",user = "xxx",passwd = "xxx",db = "xxx")
    c1 = conn.cursor( )
    c2 = conn.cursor ()
except MySQLdb.Error, e:
    print "Error %d: %s" % (e.args[0], e.args[1])
    sys.exit (1)
 
print "readfile %s" % sys.argv[1]
read_dump( sys.argv[1] )
print "commit"
conn.commit( )
c1.close( )
c2.close( )
conn.close( )
sys.exit( )
osm_roads3.sql
CREATE DATABASE roads CHARACTER SET utf8;
CONNECT roads;
CREATE TABLE `osm_roads3` (
  `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,
  `way_id` CHAR(32) DEFAULT NULL,
  `category` CHAR(32) DEFAULT NULL,
  `postcode` CHAR(5) DEFAULT NULL,
  `street` CHAR(124) DEFAULT NULL,
  `city` CHAR(124) DEFAULT NULL,
  `idx` CHAR(255) DEFAULT NULL,
  `ccity` CHAR(255) DEFAULT NULL,
  UNIQUE KEY `id` (`id`)
);
CREATE TABLE `osm_idx` (
  `id` BIGINT(20) DEFAULT NULL,
  `idx` CHAR(255) DEFAULT NULL
);
CREATE TABLE `osm_plz` (
  `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,
  `postcode` CHAR(5) DEFAULT NULL,
  `city` CHAR(255) DEFAULT NULL,
  UNIQUE KEY `id` (`id`)
);
-- prefill with postcodes
CREATE TABLE `geo_place` (
  `id` INT(11) DEFAULT NULL,
  `postcode` CHAR(255) DEFAULT NULL,
  `category` CHAR(255) DEFAULT NULL,
  `name` CHAR(255) DEFAULT NULL,
  `sortname` CHAR(255) DEFAULT NULL
);