Selaa lähdekoodia

Optimize computation time

The memory usage is a trade-off here
jmacura 2 vuotta sitten
vanhempi
commit
837475b7ba
1 muutettua tiedostoa jossa 13 lisäystä ja 12 poistoa
  1. 13 12
      csv-sparql2geojs.py

+ 13 - 12
csv-sparql2geojs.py

@@ -16,24 +16,24 @@ csvFileName = 'all-pois-italy.csv'
 def sparql2geojs(csv):
     rows = 0
     header = False
-    features = []
+    features = {}
     for row in csv:
-        if not header:
+        if header is False:
             header = True
             continue
         #pprint(result)
         featureId = row[0]
-        feature = next((f for f in features if f and f['id'] == featureId), None)
+        feature = features.get(featureId)
         # SPARQL endpoint returns one POI more that once, if it has more than one property
-        if feature is not None:
-            updateGeoJSONFeature(feature, row)
-        else:
+        if feature is None:
             feature = createGeoJSONFeature(row)
-            features.append(feature)
+            features[featureId] = feature
+        else:
+            updateGeoJSONFeature(feature, row)
         rows += 1
         if rows % 1000 == 0:
             print(rows, ' lines processed')
-    return FeatureCollection(features)
+    return FeatureCollection(list(features.values()))
 
 # Supplementary function to convert one result to a GeoJSON feature
 def createGeoJSONFeature(row):
@@ -42,12 +42,12 @@ def createGeoJSONFeature(row):
     id = row[0] # ID of the feature is its URI
     geom = parseWKT(row[3])
     #pprint(geom.get('type'))
-    if geom.get('type').upper() == "POINT":
+    #if geom.get('type').upper() == "POINT":
         #pprint(Feature(geometry = Point(coords), id = id, properties = props))
-        return Feature(geometry = geom, id = id, properties = props)
-    elif geom.get('type').upper() == "MULTIPOLYGON":
+    return Feature(geometry = geom, id = id, properties = props)
+    #elif geom.get('type').upper() == "MULTIPOLYGON":
         #pprint(Feature(geometry = MultiPolygon(coords), id = id, properties = props))
-        return Feature(geometry = MultiPolygon(coords), id = id, properties = props)
+        #return Feature(geometry = MultiPolygon(coords), id = id, properties = props)
 
 # Supplementary function to update one GeoJSON feature if it already exists
 def updateGeoJSONFeature(feature, result):
@@ -139,6 +139,7 @@ print(geojs.is_valid)
 print(geojs.errors())
 
 # uncomment following lines if you want to save the output into a file
+print("Saving GeoJSON file ...")
 outFileName = csvFileName.split(".")[0]
 with open(outFileName + ".geojson", 'w') as out:
     out.write(geodumps(geojs))