Selaa lähdekoodia

Add py script for duplicates search

jmacura 4 vuotta sitten
vanhempi
commit
63a217c2ba
1 muutettua tiedostoa jossa 55 lisäystä ja 0 poistoa
  1. 55 0
      exploreData.py

+ 55 - 0
exploreData.py

@@ -0,0 +1,55 @@
+#from csv import reader
+#import json
+from pprint import pprint
+from SPARQLWrapper import SPARQLWrapper, JSON
+#import ssl
+#import sys
+import time
+#import urllib.request, urllib.parse, urllib.error
+import rdflib
+from rdflib.plugins import sparql
+
+def getPOIsWithEqualNameAndTypeWithinDistance(distance):
+    fc_sparql = SPARQLWrapper("https://www.foodie-cloud.org/sparql")
+    q = """
+        PREFIX unit: <http://www.opengis.net/def/uom/OGC/1.0/>
+        SELECT DISTINCT ?Resource1 ?Resource2 ?distance
+        FROM <http://www.sdi4apps.eu/poi/czech>
+        WHERE {{
+            ?Resource1 a ?type1 .
+            ?Resource1 rdfs:label ?label1 .
+            ?Resource1 ogcgs:asWKT ?geo1 .
+            ?Resource2 a ?type2 .
+            ?Resource2 rdfs:label ?label2 .
+            ?Resource2 ogcgs:asWKT ?geo2 .
+            FILTER (lcase(str(?label1)) = lcase(str(?label2) )) .
+            FILTER ( ?Resource1 != ?Resource2 && ?Resource1 < ?Resource2)
+            FILTER (NOT EXISTS {{?x a ?Resource1}} && NOT EXISTS {{?y a ?Resource2}}) .
+            BIND (ogcgsf:distance ( ?geo1 , ?geo2 , unit:meter ) as ?distance) .
+            FILTER ( ?distance < {0})
+        }}
+        LIMIT 100
+        OFFSET 0
+        """.format(distance)
+    print(q)
+    fc_sparql.setQuery(q)
+    fc_sparql.setReturnFormat(JSON)
+    results = fc_sparql.query().convert()
+    #for result in results["results"]["bindings"]:
+        #print(result)
+    return results["results"]["bindings"]
+
+#g = rdflib.Graph()
+print("= LOOKING FOR DUPLICATE POIS =")
+distance = input("Distance to search within: ")
+print("Searching for duplicate POIs within {0} m ...".format(distance))
+t_1 = time.time()
+result = getPOIsWithEqualNameAndTypeWithinDistance(distance)
+print("Finished in {}".format(time.time()-t_1))
+pprint(result)
+#g.parse("ontOSM&qids.owl", format="turtle")
+#print("Input RDF graph read with {} statements!\n".format(len(g)))
+
+#with open("ontOSM_beta.owl", 'w', encoding="utf-8") as fh:
+#    fh.write(g.serialize(format="turtle").decode())
+#print("File \"ontOSM_beta.owl\" succesfully created")