jmacura 2 gadi atpakaļ
revīzija
6beff9fc84
2 mainītis faili ar 146 papildinājumiem un 0 dzēšanām
  1. 2 0
      .gitignore
  2. 144 0
      csv-sparql2geojs.py

+ 2 - 0
.gitignore

@@ -0,0 +1,2 @@
+*.geojson
+*.csv

+ 144 - 0
csv-sparql2geojs.py

@@ -0,0 +1,144 @@
+# @version 0.2.0
+
+from geojson import Point, LineString, Polygon, MultiPolygon, Feature, FeatureCollection
+from geojson import dumps as geodumps
+from csv import reader as csvreader
+#import json
+from pprint import pprint # For pretty-printing JSON
+#import sys
+#import urllib.request, urllib.parse, urllib.error
+
+
+# Inputs
+csvFileName = 'all-pois-italy.csv'
+
+# Function to convert SPARQL query results to GeoJSON FeatureCollection
+def sparql2geojs(csv):
+    rows = 0
+    header = False
+    features = []
+    for row in csv:
+        if not header:
+            header = True
+            continue
+        #pprint(result)
+        featureId = row[0]
+        feature = next((f for f in features if f and f['id'] == featureId), None)
+        # SPARQL endpoint returns one POI more that once, if it has more than one property
+        if feature is not None:
+            updateGeoJSONFeature(feature, row)
+        else:
+            feature = createGeoJSONFeature(row)
+            features.append(feature)
+        rows += 1
+        if rows % 1000 == 0:
+            print(rows, ' lines processed')
+    return FeatureCollection(features)
+
+# Supplementary function to convert one result to a GeoJSON feature
+def createGeoJSONFeature(row):
+    # each result from the SPARQL query comes with just one property-value pair
+    props = {row[1]:row[2]}
+    id = row[0] # ID of the feature is its URI
+    geom = parseWKT(row[3])
+    #pprint(geom.get('type'))
+    if geom.get('type').upper() == "POINT":
+        #pprint(Feature(geometry = Point(coords), id = id, properties = props))
+        return Feature(geometry = geom, id = id, properties = props)
+    elif geom.get('type').upper() == "MULTIPOLYGON":
+        #pprint(Feature(geometry = MultiPolygon(coords), id = id, properties = props))
+        return Feature(geometry = MultiPolygon(coords), id = id, properties = props)
+
+# Supplementary function to update one GeoJSON feature if it already exists
+def updateGeoJSONFeature(feature, result):
+    # Case 1: property does not exist in features's properties
+    if result[1] not in feature['properties'].keys():
+        feature['properties'][result[1]] = result[2]
+    else: # Case 2: property already exists, hence it has more than 1 value
+        pass
+
+# Supplementary function to parse WKT Literal.
+# It is further extended by specific functions for specific geometry types
+def parseWKT(wktLiteral):
+    type = wktLiteral.split("(")[0]
+    if type.strip() == "POINT":
+        wktCoords = wktLiteral[len(type):].replace("(", "").replace(")", "")
+        return Point(parseWKTPoint(wktCoords))
+    elif type.strip() == "LINESTRING":
+        wktCoords = wktLiteral[len(type):]
+        return LineString(parseWKTLineString(wktCoords))
+    elif type.strip() == "POLYGON":
+        wktCoords = wktLiteral[len(type)+1:-1]
+        return Polygon(parseWKTPolygon(wktCoords))
+    elif type.strip() == "MULTIPOLYGON":
+        wktCoords = wktLiteral[len(type)+1:-1]
+        return MultiPolygon(parseWKTMultiPolygon(wktCoords))
+    #     # find polygons first ...
+    #     parts = wktLiteral[15:].replace(')))', '').split(')),') #[15:] will intentionally left two ( in the beginning
+    #     print(len(parts))
+    #     coords = []
+    #     for part in parts:
+    #         # ... then rings of the polygons ...
+    #         rings = part.split('),')
+    #         print(len(rings))
+    #         polygon = [] # this must be converted into tuple later on, but tuples are immutable
+    #         for r in rings:
+    #             # ... and finally points of the rings
+    #             points = part[2:].split(',')
+    #             pprint(points)
+    #             ring = []
+    #             for point in points:
+    #                 if len(point) > 1:
+    #                     pprint(point)
+    #                     ring.append( (float(point.split(' ')[0]), float(point.split(' ')[1])) )
+    #             polygon.append(ring)
+    #         coords.append( tuple(polygon) )
+    # TODO: other geometry types support
+    else:
+        print("Unsupported geometry type {}! Will produce empty point object without coordinates!".format(type))
+    return Point([])
+
+# Supplementary function to parse WKT Point geometry to its GeoJSON equivalent.
+def parseWKTPoint(wktLiteral):
+    lonLat = wktLiteral.split(" ")
+    return (float(lonLat[0]), float(lonLat[1]))
+
+# Supplementary function to parse WKT LineString geometry to its GeoJSON equivalent.
+def parseWKTLineString(wktLiteral):
+    lineString = []
+    points = wktLiteral.replace("(", "").replace(")", "").split(",")
+    for point in points:
+        lineString.append(parseWKTPoint(point.strip()))
+    return lineString
+
+# Supplementary function to parse WKT Polygon geometry to its GeoJSON equivalent.
+def parseWKTPolygon(wktLiteral):
+    polygon = []
+    rings = wktLiteral.split(",(")
+    for ring in rings:
+        polygon.append(parseWKTLineString(ring))
+    return polygon
+
+# Supplementary function to parse WKT MultiPolygon geometry to its GeoJSON equivalent.
+def parseWKTMultiPolygon(wktLiteral):
+    multipolygon = []
+    polygons = wktLiteral.split(",((")
+    for polygon in polygons:
+        multipolygon.append(parseWKTPolygon(polygon))
+    return multipolygon
+
+
+# Executional part
+with open(csvFileName, encoding='utf-8') as csvfile:
+    filereader = csvreader(csvfile)
+    print('File read, parsing ...')
+    geojs = sparql2geojs(filereader)
+#pprint(js)
+#pprint(geojs)
+print(geojs.is_valid)
+print(geojs.errors())
+
+# uncomment following lines if you want to save the output into a file
+outFileName = csvFileName.split(".")[0]
+with open(outFileName + ".geojson", 'w') as out:
+    out.write(geodumps(geojs))