import sys import MySQLdb from pprint import pprint #pp = pprint.PrettyPrinter(indent=4) idx_replace = [ ' ', '-', '.', ',', '(', ')' ] def clean( s ): s = s.lower( ) for c in idx_replace: s = s.replace( c, '' ) return s def indexstr( l ): return '|'.join( map( clean, l )) def save( act, data ): #pprint( { 'save': data }) if 'addr:city' in data: data['city'] = data['addr:city'] if 'addr:country' in data: data['country'] = data['addr:country'] if 'addr:postcode' in data: data['postcode'] = data['addr:postcode'] if 'addr:street' in data: data['street'] = data['addr:street'] if 'highway' in data: data['category'] = data['highway'] if not 'steet' in data and 'name' in data: data['street'] = data['name'] if not 'postcode' in data and 'postal_code' in data: data['postcode'] = data['postal_code'] if not 'category' in data and 'building' in data: data['category'] = 'building' if not 'street' in data: return False if not 'postcode' in data or len( data['postcode'] ) != 5: return False if not 'category' in data: data['category'] = 'addr' if not 'city' in data: data['city'] = '' data['way_id'] = data['way_id'] found = { } idx = '' ## postcode from geo_db if data['city'] == '': c1.execute ( 'select name, sortname from geo_place where postcode=%s',( data['postcode'] )) found = c1.fetchone( ) if found is not None: pprint({ 'city found': found }) data['city'] = found[0] data['ccity'] = found[1] ## check existing c1.execute ( 'select id, city from osm_roads3 where street=%s and category=%s and postcode=%s and city=%s', ( data['street'], data['category'], data['postcode'], data['city'] )) found = c1.fetchone( ) if found is not None: pprint({ 'road found': found }) return True; ## insert roads if not 'ccity' in data: data['ccity'] = clean( data['city'] ) elif data['ccity'] != '': data['ccity'] = clean( data['ccity'] ) c2.execute ("""insert into osm_roads3( way_id, street, category, postcode, city, ccity ) VALUES( %s, %s, %s, %s, %s, %s )""", ( data['way_id'], data['street'], data['category'], data['postcode'], data['city'], data['ccity'] )) print "ins: %s" % data['way_id'] if data['city'] != '': ## update plz c1.execute ( 'select id from osm_plz where postcode=%s and city=%s', ( data['postcode'], data['city'] )) if c1.fetchone( ) is None: print "plz: %s - %s" % ( data['postcode'], data['city'] ) c2.execute ("""insert into osm_plz( postcode, city ) VALUES( %s, %s )""", ( data['postcode'], data['city'] )) ## update index idx = indexstr([ data['postcode'], data['city'], data['street'] ]) print "idx: %s" % idx c2.execute ("""insert into osm_idx( id, idx ) VALUES( %s, %s )""", ( data['way_id'], idx )) from lxml import etree def read_dump( infile ): data = {} f = open( infile ,'r' ) for ev, el in etree.iterparse( infile, events=( 'start', 'end' ) ): tag = el.tag if ev == 'start': if tag == 'way': k = data['way_id'] = el.attrib.get( 'id' ); elif 'way_id' in data: if tag == 'tag': a = el.attrib data[a.get( 'k' )] = a.get( 'v' ).encode( 'utf8' ) else: if tag == 'way': save( tag, data ); data = {} el.clear( ) try: conn = MySQLdb.connect (host = "localhost",user = "xxx",passwd = "xxx",db = "xxx") c1 = conn.cursor( ) c2 = conn.cursor () except MySQLdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit (1) print "readfile %s" % sys.argv[1] read_dump( sys.argv[1] ) print "commit" conn.commit( ) c1.close( ) c2.close( ) conn.close( ) sys.exit( )