对GADM数据shp文件结构的一些分析

这里要说的不是shp文件,而是gadm对shp文件的压缩打包操作。

首先看几个目录结构信息,分析标准的文件逻辑是如何的,便于进行批量数据处理。

在标准目录使用tree /F快速搞定并输出。

├─ABW_adm_shp
│      ABW_adm0.cpg
│      ABW_adm0.csv
│      ABW_adm0.dbf
│      ABW_adm0.prj
│      ABW_adm0.shp
│      ABW_adm0.shx
│      license.txt
│      
├─AFG_adm_shp
│      AFG_adm0.cpg
│      AFG_adm0.csv
│      AFG_adm0.dbf
│      AFG_adm0.prj
│      AFG_adm0.shp
│      AFG_adm0.shx
│      AFG_adm1.cpg
│      AFG_adm1.csv
│      AFG_adm1.dbf
│      AFG_adm1.prj
│      AFG_adm1.shp
│      AFG_adm1.shx
│      AFG_adm2.cpg
│      AFG_adm2.csv
│      AFG_adm2.dbf
│      AFG_adm2.prj
│      AFG_adm2.shp
│      AFG_adm2.shx
│      license.txt
│      
├─AGO_adm_shp
│      AGO_adm0.cpg
│      AGO_adm0.csv
│      AGO_adm0.dbf
│      AGO_adm0.prj
│      AGO_adm0.shp
│      AGO_adm0.shx
│      AGO_adm1.cpg
│      AGO_adm1.csv
│      AGO_adm1.dbf
│      AGO_adm1.prj
│      AGO_adm1.shp
│      AGO_adm1.shx
│      AGO_adm2.cpg
│      AGO_adm2.csv
│      AGO_adm2.dbf
│      AGO_adm2.prj
│      AGO_adm2.shp
│      AGO_adm2.shx
│      AGO_adm3.cpg
│      AGO_adm3.csv
│      AGO_adm3.dbf
│      AGO_adm3.prj
│      AGO_adm3.shp
│      AGO_adm3.shx
│      license.txt

从上面的文件可以看出,全部文件名均以ABW,AFG开头,基本为国家区域的代码,然后adm等级分为0,1,2,3,4,5,上面只列举出3级,所有数据遍历可得5级,当然不排除后续可能出现6级行政区域的可能性。

接下来就是对shp文件进行解析,看看会出现什么内容。对shp文件处理有很多种方式,我直接使用PostGIS的shp2pgsql转换为数据库文件,同时存入到postgresql,好处就是简单方便,没有多余的数据,麻烦就是没有可视化的内容,当然我也不需要相关数据.

<?php
/**
 * User: loveyu
 * Date: 2018/2/13
 * Time: 18:55
 * @more see https://github.com/loveyu/BlogCodeSegment/blob/master/GEO/gadm/shp2pgsql/covert.php
 */

require_once __DIR__."/../../../init.php";
if(!isset($argv[1]) || !is_dir($argv[1])) {
    die("Error or empty dir.");
}

$list = glob("{$argv[1]}/*");
$shp2pgsql_exe_file = EXE_shp2pgsql;
$psql_exe = EXE_psql;

foreach($list as $item) {
    echo $item, "\n";
    $shp_list = glob("{$item}/*.shp");
    foreach($shp_list as $value) {
        $filename = pathinfo($value, PATHINFO_FILENAME);
        $dir_name = pathinfo($value, PATHINFO_DIRNAME);
        $output = $dir_name.DIRECTORY_SEPARATOR.$filename.".sql";
        //4326 is WGS84
        echo $cmd = ""{$shp2pgsql_exe_file}" -I -s 4326 "{$value}" > "{$output}" ";
        system($cmd);
        echo $import_cmd = ""{$psql_exe}" -U postgres -d gamd_db -f "{$output}"";
        system($import_cmd);
    }
}

然后直接得到如下数据结构,附上全部不同等级数据结构。以法国为例,法国有五层数据结构数据,可以很明显的标记数据类型。

CREATE TABLE "public"."fra_adm0" (
"gid" int4 DEFAULT NEXTVAL('fra_adm0_gid_seq'::regclass) NOT NULL,
"id_0" int8,
"iso" VARCHAR(3) COLLATE "default",
"name_engli" VARCHAR(50) COLLATE "default",
"name_iso" VARCHAR(54) COLLATE "default",
"name_fao" VARCHAR(50) COLLATE "default",
"name_local" VARCHAR(54) COLLATE "default",
"name_obsol" VARCHAR(150) COLLATE "default",
"name_varia" VARCHAR(160) COLLATE "default",
"name_nonla" VARCHAR(50) COLLATE "default",
"name_frenc" VARCHAR(50) COLLATE "default",
"name_spani" VARCHAR(50) COLLATE "default",
"name_russi" VARCHAR(50) COLLATE "default",
"name_arabi" VARCHAR(50) COLLATE "default",
"name_chine" VARCHAR(50) COLLATE "default",
"waspartof" VARCHAR(100) COLLATE "default",
"contains" VARCHAR(50) COLLATE "default",
"sovereign" VARCHAR(40) COLLATE "default",
"iso2" VARCHAR(4) COLLATE "default",
"www" VARCHAR(2) COLLATE "default",
"fips" VARCHAR(6) COLLATE "default",
"ison" NUMERIC,
"validfr" VARCHAR(12) COLLATE "default",
"validto" VARCHAR(10) COLLATE "default",
"pop2000" NUMERIC,
"sqkm" NUMERIC,
"popsqkm" NUMERIC,
"unregion1" VARCHAR(254) COLLATE "default",
"unregion2" VARCHAR(254) COLLATE "default",
"developing" NUMERIC,
"cis" NUMERIC,
"transition" NUMERIC,
"oecd" NUMERIC,
"wbregion" VARCHAR(254) COLLATE "default",
"wbincome" VARCHAR(254) COLLATE "default",
"wbdebt" VARCHAR(254) COLLATE "default",
"wbother" VARCHAR(254) COLLATE "default",
"ceeac" NUMERIC,
"cemac" NUMERIC,
"ceplg" NUMERIC,
"comesa" NUMERIC,
"eac" NUMERIC,
"ecowas" NUMERIC,
"igad" NUMERIC,
"ioc" NUMERIC,
"mru" NUMERIC,
"sacu" NUMERIC,
"uemoa" NUMERIC,
"uma" NUMERIC,
"palop" NUMERIC,
"parta" NUMERIC,
"cacm" NUMERIC,
"eurasec" NUMERIC,
"agadir" NUMERIC,
"saarc" NUMERIC,
"asean" NUMERIC,
"nafta" NUMERIC,
"gcc" NUMERIC,
"csn" NUMERIC,
"caricom" NUMERIC,
"eu" NUMERIC,
"can" NUMERIC,
"acp" NUMERIC,
"landlocked" NUMERIC,
"aosis" NUMERIC,
"sids" NUMERIC,
"islands" NUMERIC,
"ldc" NUMERIC,
"geom" "public"."geometry",
CONSTRAINT "fra_adm0_pkey" PRIMARY KEY ("gid")
)
WITH (OIDS=FALSE)
;

CREATE TABLE "public"."fra_adm1" (
"gid" int4 DEFAULT NEXTVAL('fra_adm1_gid_seq'::regclass) NOT NULL,
"id_0" int8,
"iso" VARCHAR(3) COLLATE "default",
"name_0" VARCHAR(75) COLLATE "default",
"id_1" int8,
"name_1" VARCHAR(75) COLLATE "default",
"hasc_1" VARCHAR(15) COLLATE "default",
"ccn_1" int8,
"cca_1" VARCHAR(254) COLLATE "default",
"type_1" VARCHAR(50) COLLATE "default",
"engtype_1" VARCHAR(50) COLLATE "default",
"nl_name_1" VARCHAR(50) COLLATE "default",
"varname_1" VARCHAR(150) COLLATE "default",
"geom" "public"."geometry",
CONSTRAINT "fra_adm1_pkey" PRIMARY KEY ("gid")
)
WITH (OIDS=FALSE)
;

CREATE TABLE "public"."fra_adm2" (
"gid" int4 DEFAULT NEXTVAL('fra_adm2_gid_seq'::regclass) NOT NULL,
"id_0" int8,
"iso" VARCHAR(3) COLLATE "default",
"name_0" VARCHAR(75) COLLATE "default",
"id_1" int8,
"name_1" VARCHAR(75) COLLATE "default",
"id_2" int8,
"name_2" VARCHAR(75) COLLATE "default",
"hasc_2" VARCHAR(15) COLLATE "default",
"ccn_2" int8,
"cca_2" VARCHAR(254) COLLATE "default",
"type_2" VARCHAR(50) COLLATE "default",
"engtype_2" VARCHAR(50) COLLATE "default",
"nl_name_2" VARCHAR(75) COLLATE "default",
"varname_2" VARCHAR(150) COLLATE "default",
"geom" "public"."geometry",
CONSTRAINT "fra_adm2_pkey" PRIMARY KEY ("gid")
)
WITH (OIDS=FALSE)
;

CREATE TABLE "public"."fra_adm3" (
"gid" int4 DEFAULT NEXTVAL('fra_adm3_gid_seq'::regclass) NOT NULL,
"id_0" int8,
"iso" VARCHAR(3) COLLATE "default",
"name_0" VARCHAR(75) COLLATE "default",
"id_1" int8,
"name_1" VARCHAR(75) COLLATE "default",
"id_2" int8,
"name_2" VARCHAR(75) COLLATE "default",
"id_3" int8,
"name_3" VARCHAR(75) COLLATE "default",
"ccn_3" int8,
"cca_3" VARCHAR(15) COLLATE "default",
"type_3" VARCHAR(50) COLLATE "default",
"engtype_3" VARCHAR(50) COLLATE "default",
"nl_name_3" VARCHAR(75) COLLATE "default",
"varname_3" VARCHAR(100) COLLATE "default",
"geom" "public"."geometry",
CONSTRAINT "fra_adm3_pkey" PRIMARY KEY ("gid")
)
WITH (OIDS=FALSE)
;

CREATE TABLE "public"."fra_adm4" (
"gid" int4 DEFAULT NEXTVAL('fra_adm4_gid_seq'::regclass) NOT NULL,
"id_0" int8,
"iso" VARCHAR(3) COLLATE "default",
"name_0" VARCHAR(75) COLLATE "default",
"id_1" int8,
"name_1" VARCHAR(75) COLLATE "default",
"id_2" int8,
"name_2" VARCHAR(75) COLLATE "default",
"id_3" int8,
"name_3" VARCHAR(75) COLLATE "default",
"id_4" int8,
"name_4" VARCHAR(100) COLLATE "default",
"varname_4" VARCHAR(100) COLLATE "default",
"ccn_4" int8,
"cca_4" VARCHAR(20) COLLATE "default",
"type_4" VARCHAR(35) COLLATE "default",
"engtype_4" VARCHAR(35) COLLATE "default",
"geom" "public"."geometry",
CONSTRAINT "fra_adm4_pkey" PRIMARY KEY ("gid")
)
WITH (OIDS=FALSE)
;

CREATE TABLE "public"."fra_adm5" (
"gid" int4 DEFAULT NEXTVAL('fra_adm5_gid_seq'::regclass) NOT NULL,
"id_0" int8,
"iso" VARCHAR(3) COLLATE "default",
"name_0" VARCHAR(75) COLLATE "default",
"id_1" int8,
"name_1" VARCHAR(75) COLLATE "default",
"id_2" int8,
"name_2" VARCHAR(75) COLLATE "default",
"id_3" int8,
"name_3" VARCHAR(75) COLLATE "default",
"id_4" int8,
"name_4" VARCHAR(100) COLLATE "default",
"id_5" int8,
"name_5" VARCHAR(75) COLLATE "default",
"ccn_5" int8,
"cca_5" VARCHAR(25) COLLATE "default",
"type_5" VARCHAR(25) COLLATE "default",
"engtype_5" VARCHAR(25) COLLATE "default",
"geom" "public"."geometry",
CONSTRAINT "fra_adm5_pkey" PRIMARY KEY ("gid")
)
WITH (OIDS=FALSE)
;

这个结构数据相当的明显了,adm0基本为国家的基础信息数据,而 adm1~adm5均为下级区,并且下一级的数据包含上一级的数据ID,相对来说结构已经非常直观。主要是默认的层级关系相对分散,不同的国家层级关系不一致,有些结构上的问题可能并没有那么明确。

此时已经导入了728张数据表,接下来就需要对这些数据表进行整理,毕竟这么多表的存在不便于搜索等工作,而且看了一些地区,貌似没有城市,比如美国的数据只支持到县,不太科学的样子,所以数据上的挖掘还有待进一步的处理和发现。

当前还没有任何评论

写下你最简单的想法