Introduction
This tutorial uses the sf package to perform geoprocessing, and ggplot to visualize the data.
Additionally, we practice downloading open-source data from the internet.
Adjust Settings, Load Packages
knitr::opts_chunk$set(echo = TRUE)
for(p in c("ggplot2","sf","dplyr","plyr","raster")){library(p,character.only = T)}
directory=".../Layers_Canada/CanadaPopulationDensity/"
theme_set(theme_bw()+
theme(
panel.background = element_blank(),axis.ticks = element_blank(),
legend.background = element_blank(),panel.grid = element_line(colour = 0)
))
Download Open-Source data from open.canada.ca
setwd(directory)
if(!("CAdata.zip" %in% list.files())){
#https://open.canada.ca/data/en/dataset/5be03a46-8504-40a7-a96c-af195bae0428
download.file(url = "http://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/files-fichiers/gecu000e11a_e.zip",
dest=paste0(directory,"CAdata.zip"))
}
if(!("CAcensusdata" %in% list.files())){unzip(zipfile = "CAdata.zip",exdir = "CAcensusdata")}
#https://open.canada.ca/data/en/dataset/ece81c43-aa4e-41ef-86c2-3835eb5aa95c
download.file(url = "http://www12.statcan.gc.ca/census-recensement/2016/dp-pd/hlt-fst/pd-pl/Tables/CompFile.cfm?Lang=Eng&T=701&OFT=FULLCSV",dest=paste0(directory,"CAcensusdata/CensusData.csv"))
if(!("Borders.zip" %in% list.files())){
#http://thematicmapping.org/downloads/world_borders.php
download.file(url = "http://thematicmapping.org/downloads/TM_WORLD_BORDERS-0.3.zip",dest=paste0(directory,"Borders.zip"))
}
setwd(setwd(directory))
if(!("WorldBorders" %in% list.files())){unzip(zipfile = "Borders.zip",exdir = "WorldBorders")}
Inspect the Canadian Census data from 2016.
We see that population within census divisions is log-normally distributed.
We can also confirm that the population density calculations within each census division match the population and area values.
The population of Canada in 2016 was 35.2 million people.
census=read.csv(paste0(directory,"CAcensusdata/CensusData.csv"))
ggplot(data=census)+
geom_histogram(aes(x=Population..2016))+
scale_x_log10()
# Confirm that Population Density calculations match areas
ggplot(data=census)+
geom_point(aes(y=Population..2016,x=Land.area.in.square.kilometres..2016*Population.density.per.square.kilometre..2016))+
geom_abline(intercept=0,slope=1)
print(paste(round(sum(census$Population..2016,na.rm=TRUE)*10^-6,1),"million people"))
## [1] "35.2 million people"
Inspect GIS Data
proj="+proj=aea +lat_1=50 +lat_2=70 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs "
CAshp=st_read(paste0(directory,"CAcensusdata/gcd_000e11a_e.shp"))
## Reading layer `gcd_000e11a_e' from data source `...\Layers_Canada\CanadaPopulationDensity\CAcensusdata\gcd_000e11a_e.shp' using driver `ESRI Shapefile'
## Simple feature collection with 293 features and 5 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -141.0181 ymin: 41.71096 xmax: -52.61941 ymax: 83.1355
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
CAshp=st_transform(CAshp,proj)
CAshp=dplyr::left_join(CAshp,census,by=c("CDUID"="Geographic.code"))
ggplot()+
geom_sf(data=CAshp)+
ggtitle("Canada Census Divisions")
borders=st_read(paste0(directory,"WorldBorders/TM_WORLD_BORDERS-0.3.shp"))
## Reading layer `TM_WORLD_BORDERS-0.3' from data source `...\Layers_Canada\CanadaPopulationDensity\WorldBorders\TM_WORLD_BORDERS-0.3.shp' using driver `ESRI Shapefile'
## Simple feature collection with 246 features and 11 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -180 ymin: -90 xmax: 180 ymax: 83.6236
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
USA=borders[borders$NAME=="United States",c("NAME","geometry")]
USA=st_transform(USA,proj)
st_crs(USA)$units
## [1] "m"
USA_buf=st_buffer(USA,dist=160934) # 100 miles = 160934 meters
ggplot()+
geom_sf(data=USA,fill="gray",col="transparent")+
geom_sf(data=USA_buf,fill="transparent",col=2)+
ggtitle("US Border and 100 mile buffer")
Inspect Population Density Data
We can see that the region within 100 miles of the US border is highly populated, but there are cities like Calgary, Edmonton, and Saskatoon that are north of this boundary.
ggplot()+
geom_sf(data=CAshp,aes(fill=log10(Population.density.per.square.kilometre..2016)),col="transparent")+
geom_sf(data=USA_buf,col=2,fill="transparent")+
coord_sf(xlim = extent(CAshp)[1:2],ylim = extent(CAshp)[3:4])+
ggtitle("Canadian Census Divisions and Population Density")+
scale_fill_gradient(name="Pop_dens",low="darkblue",high="red",na.value="darkblue",
limits=c(-1.2,3.5),breaks=c(-1:3),labels=(10^c(-1:3)))
Use st_intersection to extract census tracts
We use st_intersection to intersect the buffered US borer polygon and Canadian census tracts. We assume population is evenly distributed within census divisions, allowing us to adjust the size of intersected tracts for their new clipped size. Inspecting their new calculated areas, we see that many tracts are completely within the 100 mile buffer, but others have a much lower area.
Our final calculation finds that 27.3 million people live within 100 miles of the US border, or 77.7% of the population.
intersect=st_intersection(CAshp,USA_buf)
plot=ggplot()+
geom_sf(data=CAshp,fill="gray",col="transparent")+
geom_sf(data=intersect,aes(fill=log10(Population.density.per.square.kilometre..2016)),col="transparent")+
ggtitle("Population of Canada within 100 miles of the US border")+
scale_fill_gradient(name=expression(paste("Persons km",NA^-2)),
low="darkblue",high="red",na.value="darkblue",
limits=c(-1.2,3.5),breaks=c(-1:3),labels=(10^c(-1:3)))+
theme(legend.position = c(0.85,0.8),axis.ticks = element_blank(),
axis.text = element_blank(),axis.title = element_blank())
ggsave(plot,filename = paste0(directory,"BufferPopDens.png"),width = 5,height=4,dpi=300,units="in")
plot
intersect=intersect%>%mutate(area_m2 = st_area(.) %>% as.numeric())
intersect$area_km2=intersect$area_m2*(1000^-2)
ggplot(data=intersect)+
geom_point(aes(y=area_km2,x=Land.area.in.square.kilometres..2016))+
scale_y_log10()+scale_x_log10()+
geom_abline(intercept=0,slope=1)
intersect$pop_2016_intersect=intersect$area_km2*intersect$Population.density.per.square.kilometre..2016
paste0(round(sum(intersect$pop_2016_intersect)*10^-6,1)," million people")
## [1] "27.3 million people"
print(paste0(round(100*sum(intersect$pop_2016_intersect,na.rm=TRUE)/sum(census$Population..2016,na.rm=TRUE),1),"% of population within 100 miles of the US border"))
## [1] "77.7% of population within 100 miles of the US border"
What about Canada's population in relation to some US city? Seattle?
lon=seq(-140,-45,2)
df <- data.frame(lon=c(lon,rev(lon)),lat=c(rep(47.6,length(lon)),rep(30,length(lon))))
df <- rbind(df, df[1,])
seattle <- st_sf(st_sfc(st_polygon(list(as.matrix(df)))), crs = 4326)
seattle=st_transform(seattle,proj)
intersect=st_intersection(seattle,CAshp)
intersect=intersect%>%mutate(area_m2 = st_area(.) %>% as.numeric())
intersect$area_km2=intersect$area_m2*(1000^-2)
intersect$pop_2016_intersect=intersect$area_km2*intersect$Population.density.per.square.kilometre..2016
ggplot()+
geom_sf(data=CAshp,fill="gray",col="transparent")+
geom_sf(data=intersect,aes(fill=log10(Population.density.per.square.kilometre..2016)),col="transparent")+
geom_sf(data=seattle,col=2,fill="transparent")+
coord_sf(xlim = extent(CAshp)[1:2],ylim = extent(CAshp)[3:4])+
ggtitle("Population of Canada south of Seattle, Washington")+
scale_fill_gradient(name=expression(paste("Persons km",NA^-2)),
low="darkblue",high="red",na.value="darkblue",
limits=c(-1.2,3.5),breaks=c(-1:3),labels=(10^c(-1:3)))+
theme(legend.position = c(0.85,0.8),axis.ticks = element_blank(),
axis.text = element_blank(),axis.title = element_blank())
paste0(round(sum(intersect$pop_2016_intersect)*10^-6,1)," million people")
## [1] "23.3 million people"
print(paste0(round(100*sum(intersect$pop_2016_intersect,na.rm=TRUE)/sum(census$Population..2016,na.rm=TRUE),1),"% of population south of Seattle, Washington State"))
## [1] "66.2% of population south of Seattle, Washington State"