csv-sparql2geojs.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. # @version 0.2.0
  2. from geojson import Point, LineString, Polygon, MultiPolygon, Feature, FeatureCollection
  3. from geojson import dumps as geodumps
  4. from csv import reader as csvreader
  5. #import json
  6. from pprint import pprint # For pretty-printing JSON
  7. #import sys
  8. #import urllib.request, urllib.parse, urllib.error
  9. # Inputs
  10. csvFileName = 'all-pois-italy.csv'
  11. # Function to convert SPARQL query results to GeoJSON FeatureCollection
  12. def sparql2geojs(csv):
  13. rows = 0
  14. header = False
  15. features = []
  16. for row in csv:
  17. if not header:
  18. header = True
  19. continue
  20. #pprint(result)
  21. featureId = row[0]
  22. feature = next((f for f in features if f and f['id'] == featureId), None)
  23. # SPARQL endpoint returns one POI more that once, if it has more than one property
  24. if feature is not None:
  25. updateGeoJSONFeature(feature, row)
  26. else:
  27. feature = createGeoJSONFeature(row)
  28. features.append(feature)
  29. rows += 1
  30. if rows % 1000 == 0:
  31. print(rows, ' lines processed')
  32. return FeatureCollection(features)
  33. # Supplementary function to convert one result to a GeoJSON feature
  34. def createGeoJSONFeature(row):
  35. # each result from the SPARQL query comes with just one property-value pair
  36. props = {row[1]:row[2]}
  37. id = row[0] # ID of the feature is its URI
  38. geom = parseWKT(row[3])
  39. #pprint(geom.get('type'))
  40. if geom.get('type').upper() == "POINT":
  41. #pprint(Feature(geometry = Point(coords), id = id, properties = props))
  42. return Feature(geometry = geom, id = id, properties = props)
  43. elif geom.get('type').upper() == "MULTIPOLYGON":
  44. #pprint(Feature(geometry = MultiPolygon(coords), id = id, properties = props))
  45. return Feature(geometry = MultiPolygon(coords), id = id, properties = props)
  46. # Supplementary function to update one GeoJSON feature if it already exists
  47. def updateGeoJSONFeature(feature, result):
  48. # Case 1: property does not exist in features's properties
  49. if result[1] not in feature['properties'].keys():
  50. feature['properties'][result[1]] = result[2]
  51. else: # Case 2: property already exists, hence it has more than 1 value
  52. pass
  53. # Supplementary function to parse WKT Literal.
  54. # It is further extended by specific functions for specific geometry types
  55. def parseWKT(wktLiteral):
  56. type = wktLiteral.split("(")[0]
  57. if type.strip() == "POINT":
  58. wktCoords = wktLiteral[len(type):].replace("(", "").replace(")", "")
  59. return Point(parseWKTPoint(wktCoords))
  60. elif type.strip() == "LINESTRING":
  61. wktCoords = wktLiteral[len(type):]
  62. return LineString(parseWKTLineString(wktCoords))
  63. elif type.strip() == "POLYGON":
  64. wktCoords = wktLiteral[len(type)+1:-1]
  65. return Polygon(parseWKTPolygon(wktCoords))
  66. elif type.strip() == "MULTIPOLYGON":
  67. wktCoords = wktLiteral[len(type)+1:-1]
  68. return MultiPolygon(parseWKTMultiPolygon(wktCoords))
  69. # # find polygons first ...
  70. # parts = wktLiteral[15:].replace(')))', '').split(')),') #[15:] will intentionally left two ( in the beginning
  71. # print(len(parts))
  72. # coords = []
  73. # for part in parts:
  74. # # ... then rings of the polygons ...
  75. # rings = part.split('),')
  76. # print(len(rings))
  77. # polygon = [] # this must be converted into tuple later on, but tuples are immutable
  78. # for r in rings:
  79. # # ... and finally points of the rings
  80. # points = part[2:].split(',')
  81. # pprint(points)
  82. # ring = []
  83. # for point in points:
  84. # if len(point) > 1:
  85. # pprint(point)
  86. # ring.append( (float(point.split(' ')[0]), float(point.split(' ')[1])) )
  87. # polygon.append(ring)
  88. # coords.append( tuple(polygon) )
  89. # TODO: other geometry types support
  90. else:
  91. print("Unsupported geometry type {}! Will produce empty point object without coordinates!".format(type))
  92. return Point([])
  93. # Supplementary function to parse WKT Point geometry to its GeoJSON equivalent.
  94. def parseWKTPoint(wktLiteral):
  95. lonLat = wktLiteral.split(" ")
  96. return (float(lonLat[0]), float(lonLat[1]))
  97. # Supplementary function to parse WKT LineString geometry to its GeoJSON equivalent.
  98. def parseWKTLineString(wktLiteral):
  99. lineString = []
  100. points = wktLiteral.replace("(", "").replace(")", "").split(",")
  101. for point in points:
  102. lineString.append(parseWKTPoint(point.strip()))
  103. return lineString
  104. # Supplementary function to parse WKT Polygon geometry to its GeoJSON equivalent.
  105. def parseWKTPolygon(wktLiteral):
  106. polygon = []
  107. rings = wktLiteral.split(",(")
  108. for ring in rings:
  109. polygon.append(parseWKTLineString(ring))
  110. return polygon
  111. # Supplementary function to parse WKT MultiPolygon geometry to its GeoJSON equivalent.
  112. def parseWKTMultiPolygon(wktLiteral):
  113. multipolygon = []
  114. polygons = wktLiteral.split(",((")
  115. for polygon in polygons:
  116. multipolygon.append(parseWKTPolygon(polygon))
  117. return multipolygon
  118. # Executional part
  119. with open(csvFileName, encoding='utf-8') as csvfile:
  120. filereader = csvreader(csvfile)
  121. print('File read, parsing ...')
  122. geojs = sparql2geojs(filereader)
  123. #pprint(js)
  124. #pprint(geojs)
  125. print(geojs.is_valid)
  126. print(geojs.errors())
  127. # uncomment following lines if you want to save the output into a file
  128. outFileName = csvFileName.split(".")[0]
  129. with open(outFileName + ".geojson", 'w') as out:
  130. out.write(geodumps(geojs))