wikidata_v2.sh 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. #!/bin/bash
  2. # Soubor se státy
  3. #countries="Data/countries_sample.csv"
  4. #objects="Data/objects_sample.csv"
  5. countries="Data/countries_sample.csv"
  6. objects="Data/objects_sample.csv"
  7. file_stat="../Statistics/wikidata.csv"
  8. echo > $file_stat
  9. while IFS=$';' read zkratka jmeno gn db wd
  10. do
  11. while IFS=$';' read waze osm1 osm2 owd
  12. do
  13. coun=${wd##*Q}
  14. obj=${owd##*Q}
  15. echo Stahovat: $jmeno: $coun - $osm2
  16. #file_in="https://wdq.wmflabs.org/api?q=claim[31:33506]%20AND%20claim[17:213]" #muzea Česko
  17. #file_in="https://wdq.wmflabs.org/api?q=claim[31:33506]%20AND%20claim[17:228]" #muzea Andorra
  18. # Kombinace státu a typu objektu
  19. file_in="https://wdq.wmflabs.org/api?q=claim[31:"$obj"]%20AND%20claim[17:"$coun"]"
  20. file_tmp1="/home/cerba/Temp/"$zkratka"_"$osm2".csv"
  21. file_out="/media/cerba/Data/Data/SPOI/RDF_output/WD_"$zkratka"_"$osm2".rdf"
  22. # Stahování a úprava dotazu
  23. echo Stahování a úprava dotazu
  24. wget -O "/home/cerba/Temp/temp.json" "${file_in}"
  25. temp=$(</home/cerba/Temp/temp.json)
  26. temp1=$temp
  27. temp1=${temp1#*items\":}
  28. temp1=${temp1%%,*}
  29. echo Počet položek: $temp1
  30. echo $jmeno";"$osm1"."$osm2";"$temp1 >> $file_stat
  31. if [ "${temp1}" != 0 ]; then
  32. temp=${temp##*:}
  33. temp=${temp%]*}
  34. temp=${temp##*[}
  35. echo $temp\ | tr "," "\n"
  36. echo $temp\ | tr "," "\n" >> $file_tmp1
  37. echo '<?xml version="1.0" encoding="utf-8"?>' > $file_out
  38. echo '<rdf:RDF xmlns:geos="http://www.opengis.net/ont/geosparql#"
  39. xmlns:otm="http://opentransportmap.info/rdf#"
  40. xmlns:owl="http://www.w3.org/2002/07/owl#"
  41. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  42. xmlns:poi="http://www.openvoc.eu/poi#"
  43. xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">' >> $file_out
  44. # Čtení jednotlivých výsledků dotazu
  45. while IFS=$',' read -r -a A
  46. do
  47. down="https://www.wikidata.org/entity/Q"$A".rdf"
  48. down=${down//[[:blank:]]/}
  49. echo "Soubor pro stahování:" $down
  50. echo "----------------------------------------------------"
  51. wget -O "/home/cerba/Temp/temp.rdf" $down
  52. echo 'Transformuji: https://www.wikidata.org/entity/Q'$A'.rdf'
  53. java -Xms2560m -Xmx24560m -jar saxon9he.jar -s:/home/cerba/Temp/temp.rdf -xsl:../XSLT/transform_wikidata.xsl -o:/home/cerba/Temp/temp.xml gn="$gn" db="$db" waze="$waze" osm1="$osm1" osm2="$osm2"
  54. h1=$(</home/cerba/Temp/temp.xml)
  55. echo $h1 >> $file_out
  56. echo '' >> $file_out
  57. done < $file_tmp1
  58. echo '</rdf:RDF>' >> $file_out
  59. fi
  60. # Mazání dočasného adresáře
  61. rm -rf /home/cerba/Temp/*.*
  62. done < $objects
  63. done < $countries