2019-06-15

千葉県市区町村の高齢化率

sf パッケージを用いたコロプレス・マップのプロット。

今回は千葉県市区町村の高齢化率を例にとる。

まずは、千葉県行政区分のシェイプファイルが必要だが、これは ESRI ジャパンが無償配布している「全国市区町村界データ」から、千葉県の部分を取り出して利用する。

入手先は、

ダウンロードした zip ファイルを解凍、Rに読み込んで、千葉県のデータのみを抽出する。なお、末尾（60番目）のオブジェクトは「所属未定地」なので削除しておく。

library(dplyr)

jp <- sf::st_read("japan_ver81.shp")
jp %>% filter(KEN %in% "千葉県") -> chiba
chiba <- chiba[-60,]

次に、人口の最新データを千葉県の公式サイトから入手する。

ここにある「第1表　男女別、年齢（3区分）別人口-県・市区町村・11地域（エクセル：19KB）」に65歳以上の人口及び、その割合が市区町村別に掲載されている。

このエクセル・ファイルをRに直接読み込んでもいいのだが、処理が煩雑になるので、必要な部分だけ CSVファイルに変換したものを作成して gist にアップロードしてある。

chiba_pop_20180401.csv

これをダウンロードしRに読み込んで、シェイプファイルとマージする。

pop <- read.csv("chiba_pop_20180401.csv")
chiba2 <- left_join(chiba, pop, by = c("SIKUCHOSON" = "sikuchoson"))

もとの人口データで、コラム名「r65」が高齢化率を表している。
これを cut_interval 関数で5％刻みの階級に分類して、それぞれ異なる色で塗り分ける。

library(ggplot2)
library(ggthemes)

ggplot() +
geom_sf(data = chiba2,
                aes(fill = cut_interval(r65, length = 5)),
                alpha = 0.8, colour = 'grey20', size = 0.1) +
scale_fill_brewer(palette = "YlOrRd",
                             name = "Residents of \nage 65 and older(%)") +
theme_bw() +
theme(axis.line = element_blank(),
            axis.text.x = element_blank(),
           axis.text.y = element_blank(),
           axis.ticks = element_blank(),
           panel.border = element_blank()) +
theme(legend.position = c(0.95, 0.22))

f:id:fusion0202:20190615193809p:plain

2019-06-13

太平洋ひとりぼっち

ggplot2 R-GIS

これの第２章に、アカウミガメの移動軌跡の例が載っている。

発信機をつけて人工衛星で追跡したら、メキシコから日本まで、太平洋横断の旅が観察された。その経過をプロットするというもの。

データファイルとRスクリプトは上記サイトからダウンロードできる（Data sets and script の “2”）。

###################################################
### code chunk number 52: cm.Rnw:1002-1004
###################################################
turtle_df <- read.csv("seamap105_mod.csv")
summary(turtle_df)

###################################################
### code chunk number 54: cm.Rnw:1021-1027
###################################################
timestamp <- as.POSIXlt(strptime(as.character(turtle_df$obs_date), "%m/%d/%Y %H:%M:%S"), "GMT")
turtle_df1 <- data.frame(turtle_df, timestamp=timestamp)
turtle_df1$lon <- ifelse(turtle_df1$lon < 0, turtle_df1$lon+360, turtle_df1$lon)
turtle_sp <- turtle_df1[order(turtle_df1$timestamp),]
coordinates(turtle_sp) <- c("lon", "lat")
proj4string(turtle_sp) <- CRS("+proj=longlat +ellps=WGS84")

###################################################
### code chunk number 55: cm.Rnw:1031-1034
###################################################
library(maptools)
gshhs.c.b <- system.file("share/gshhs_c.b", package="maptools")
pac <- Rgshhs(gshhs.c.b, level=1, xlim=c(130,250), ylim=c(15,60), verbose=FALSE)

###################################################
### code chunk number 56: cm.Rnw:1039-1051
###################################################
plot(pac$SP, axes=TRUE, col="khaki2", xaxs="i", yaxs="i")
plot(turtle_sp, add=TRUE)
m_rle <- rle(months(turtle_sp$timestamp))
clen <- cumsum(m_rle$lengths[-length(m_rle$lengths)])-1
crds <- coordinates(turtle_sp)
text(crds[clen,], labels=m_rle$values[-1], pos=3, offset=1.5, srt=45)

f:id:fusion0202:20190613023423p:plain

同じような図を、ggplot2 で描いてみた。

library(ggplot2)
library(ggthemes)

turtle_df <- read.csv("seamap105_mod.csv")
timestamp <- as.POSIXlt(strptime(as.character(turtle_df$obs_date),
                                             "%m/%d/%Y %H:%M:%S"), "GMT")
turtle_df1 <- data.frame(turtle_df, timestamp = timestamp)
turtle_df1$lon <- ifelse(turtle_df1$lon < 0, turtle_df1$lon + 360, turtle_df1$lon)
turtle_sp <- turtle_df1[order(turtle_df1$timestamp),]
m_rle <- rle(months(turtle_sp$timestamp))
clen <- cumsum(m_rle$lengths[-length(m_rle$lengths)]) - 1
turtle_sp_pts <- turtle_sp[clen,]
turtle_sp_pts <- cbind(turtle_sp_pts, label = m_rle$values[-1])

world <- map_data('world2')

p <- ggplot() + geom_map(data = world, map = world,
                                  aes(x = long, y = lat, group = group, map_id =r egion),

                                  fill = "#ffffbb", colour = "#7f7f7f", size = 0.5)
p <- p + xlim(115, 270) + ylim(0, 60)
p <- p + geom_line(data = turtle_sp, aes(x = lon, y = lat),
                                 colour = "darkblue", alpha=0.4)
p <- p + geom_point(data = turtle_sp_pts, aes(x = lon, y = lat),
                                  colour = "darkblue", size = 3)
p <- p + geom_text(data = turtle_sp_pts,

                                 aes(x = lon + 1, y = lat + 1, label = label), angle = 45,

                                 colour = "darkred", hjust = -0.1, nudge_x = -0.1)
p <- p + coord_equal() + theme_map()
print(p)

f:id:fusion0202:20190613024809p:plain

なお、このアカウミガメのデータは、だいぶ以前に Nature で紹介された研究からのものと思われる。

2019-06-10

関東甲信の梅雨／期間と降水量の関係

以前の記事「関東甲信・梅雨入り日の推移」の続き。

気象庁のウェブページにある表：

f:id:fusion0202:20190610191223p:plain

これをスクレイピングして、各年の梅雨の期間（日数）を求め、これと降水量の平年比との関係を調べる。

library(rvest)
library(dplyr)
library(stringr)
library(ggplot2)

dt <- read_html("https://www.data.jma.go.jp/fcd/yoho/baiu/kako_baiu09.html")

dt %>% html_nodes(xpath = "//table") %>% html_text() %>%
str_replace_all(pattern = "－", replacement = ",") %>%
str_replace_all(pattern = "ごろ", replacement = ",") %>%
str_replace_all(pattern = "年", replacement = "-") %>%
str_replace_all(pattern = "月", replacement = "-") %>%
str_replace_all(pattern = "日", replacement = "") %>%
str_split("\n", simplify = T) -> tab

tab2 <- str_split(tab[2,], ",", simplify = T)
tab3 <- tab2[6:(nrow(tab2)-2),]

ここまで実行すると、以下のような文字列のテーブルができる。

> head(tab3)
       [,1]                 [,2]       [,3]
[1,] "1951-6-15" "7-18" "121 "
[2,] "1952-6-14" "7-16" "151 "
[3,] "1953-6- 1"   "7-24" "149 "
[4,] "1954-6- 6"   "7-25" "135 "
[5,] "1955-6-13" "7- 9" "60 "
[6,] "1956-6- 9"   "7-26" "81 "

以下、梅雨入りの日と梅雨明けの日の差分を求め、年号および降水量とともにデータフレームを作る。

tab3[,1] %>% str_sub(start = 1, end = 4) -> Year
Duration <- as.Date(paste0(Year, "-", tab3[,2])) - as.Date(tab3[,1])
duration <- as.numeric(Duration)
rainfall <- as.numeric(tab3[,3])
year <- as.numeric(Year)

df <- data.frame(year, duration, rainfall)

梅雨の日数の推移のプロット：

g <- ggplot(df,aes(x = year, y = duration))
g <- g + geom_line(color = "darkblue", size = 0.5, alpha = 0.4)
g <- g + geom_point(color = "darkblue")
g <- g + theme(axis.title = element_text(size = 14))
g <- g + ylim(0,80)
g <- g + ylab("Duration (days)")
g <- g + ggtitle("梅雨の期間（日）")
g <- g + theme(plot.title = element_text(size=18))
print(g)

f:id:fusion0202:20190610192605p:plain

降水量平年比の推移のプロット：

g <- ggplot(df,aes(x = year, y = rainfall))
g <- g + geom_line(color = "darkblue", size = 0.5, alpha = 0.4)
g <- g + geom_point(color = "darkblue")
g <- g + theme(axis.title = element_text(size = 14))
g <- g + ylim(0,160)
g <- g + ylab("Rainfall (average ratio; %)")
g <- g + ggtitle("降水量平年比（％）")
g <- g + theme(plot.title = element_text(size = 18))
print(g)

f:id:fusion0202:20190610192924p:plain

最後に期間と降水量の散布図：

g <- ggplot(df,aes(x = duration, y = rainfall))
g <- g + geom_point(color = "darkblue")
g <- g + theme(axis.title = element_text(size = 14))
g <- g + xlim(20,80) + ylim(40,160)
g <- g + xlab("Duration (day)") + ylab("Rainfall (average ratio; %)")
g <- g + ggtitle("梅雨の期間と降水量平年比")
g <- g + theme(plot.title = element_text(size = 18))
g <- g + stat_smooth(method = "lm", se = T, color = "grey60")
print(g)

f:id:fusion0202:20190610193131p:plain

ほんの少し、正の相関があるように見えるが、

> cor.test(df$rainfall, df$duration, use = "complete.obs")

             Pearson's product-moment correlation

data:    df$rainfall and df$duration
t = 1.459, df = 65, p-value = 0.1494
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.06491531   0.40112109
sample estimates:
       cor
0.17807

p-value = 0.1494 なので、有意ではない。

2019-05-31

積み上げ折れ線グラフ

ggplot2 R

例として、国立がんセンターの「がんに関する統計データ」の子宮頸がん死亡データを使う。

ganjoho.jp

ここからエクセルファイルをダウンロードしRに読み込む。

エクセルデータを読み込むためのパッケージには各種あるが、ここでは gdata を用いている。

続いて、子宮頸部のコード（＝17）を確認して、該当データを抽出する。

なお、データは５歳刻みの年齢階級に分割されているが、この並びを反転させておくのが、グラフをプロットするためのポイント。

library(gdata)
library(ggthemes)
library(ggplot2)
library(reshape2)

fn <- "cancer_mortality(1958-2017).xls"
url <- paste0("https://ganjoho.jp/data/reg_stat/statistics/dl/", fn)
download.file(url, destfile = fn)

d <- read.xls(fn, sheet = 3)
cr <- d[d$コード == 17,]
year <- cr$死亡年
num <- cr[25:8]

colnames(num) <- rev(c( "0-4",      "5-9",     "10-14", "15-19",
                                         "20-24", "25-29", "30-34", "35-39", "40-44",
                                           "45-49", "50-54", "55-59", "60-64", "65-69",
                                           "70-74", "75-79", "80-84", "85-"))

積み上げ折れ線グラフは geom_area で簡単にプロットできる。

df <- melt(cbind(year, num), id.var = "year")
colnames(df) <- c("Year", "Age", "Number")

cols = colorRampPalette(c("#0068b7", "white", "#f39800"))

p <- ggplot(df, aes(x = Year, y = Number))
p <- p + geom_area(aes(group = Age, fill = Age))
p <- p + scale_fill_manual(values = cols(18))
p <- p + theme_economist()
p <- p + theme(axis.title = element_text(size = 14))
p <- p + ggtitle("子宮頸がん死亡数（1958〜2017）")
print(p)

f:id:fusion0202:20190531082710p:plain

2019-05-30

関東甲信・梅雨入り日の推移

気象庁のウェブサイトに「昭和26年（1951年）以降の梅雨入りと梅雨明け（確定値）：関東甲信」のページがある。

ここに掲載されている表（table）を例に、ウェブスクレイピングを行ってみた（それに続く処理は参考である）。

library(rvest)
library(dplyr)

dt <- read_html("https://www.data.jma.go.jp/fcd/yoho/baiu/kako_baiu09.html")

dt %>% html_nodes(xpath = "//table") %>% html_text() -> tab
tab2 <- unlist(strsplit(tab [ [ 2 ] ], "\n"))

> head(tab2)
[1] "関東甲信"
[2] "年"
[3] "入り"
[4] "明け"
[5] "梅雨の時期の降水量の平年比（地域平均値）(%)"
[6] "1951年6月15日ごろ7月18日ごろ121 "
> tail(tab2)
[1] "2014年6月 5日ごろ7月21日ごろ116 "
[2] "2015年6月 3日ごろ7月10日ごろ128 "
[3] "2016年6月 5日ごろ7月29日ごろ74 "
[4] "2017年6月 7日ごろ7月 6日ごろ71 "
[5] "2018年6月 6日ごろ6月29日ごろ92 "
[6] "平年6月 8日ごろ7月21日ごろ　 "

このように、ヘッダー部分が５個、その後に各年のデータ、そして最後に平年データという具合に、表の要素が文字列として取得できる。

以下は、これらの文字列から梅雨入り日を抽出し、回帰直線とともにプロットするスクリプト。

library(ggplot2)
library(ggthemes)

date <- chartr("月", "-", substr(tab2[6:(length(tab2) - 1)], 6, 9))
Date <- as.Date(paste0("2000-", date))
year <- 1951:(1950 + length(Date))
df <- data.frame(year, Date)

g <- ggplot(data = df, aes(x = year, y = Date))
g <- g + geom_line(color = "darkblue", size = 0.5, alpha = 0.4)
g <- g + geom_point(color = "darkblue")
g <- g + stat_smooth(method = "lm", se = F, color = "grey60")
g <- g + theme_economist()
g <- g + theme(axis.title = element_text(size = 15))
print(g)

f:id:fusion0202:20190530130033p:plain

2019-05-25

太陽黒点数の推移プロット

国立天文台のサイト：

　　http://solarwww.mtk.nao.ac.jp/mitaka_solar1/data03/sunspots/number

から、太陽黒点の月平均個数を取得して、折れ線グラフを描く。

もうだいぶ前に作ったものなので、稚拙なスクリプトではあるが、いちおう動作する。

library(ggplot2)
library(ggthemes)

last.year <- as.numeric(substr(Sys.time(), 1, 4))

url <- "http://solarwww.mtk.nao.ac.jp/mitaka_solar1/data03/sunspots/number/rz"
df <- data.frame(NULL, NULL, NULL)

for(year in 1929:last.year) {
   url2 <- paste(url, as.character(year), ".txt", sep = "")
   rz <- file(url2, "r")
   while(length(str <- readLines(con = rz, 1)) != 0) {
      if(length(grep("Mon", str)) == 1) {
        vec <- unlist(strsplit(str, " "))
        vec <- vec[vec != ""]
        df <- rbind(df, as.numeric(tail(vec, n = 3)))
     }
   }
}

seq <- 0:(nrow(df) - 1)
yy <- as.character(seq %/% 12 + 1929)
mm <- as.character(seq %% 12 + 1)
date <- as.Date(paste(yy, "-", mm, "-01", sep = ""))
sunspots.df <- cbind(date, df)
names(sunspots.df) <- c("Date", "North", "South", "Total")

str <- substr(tail(sunspots.df$Date, 1), 1, 7)
substring(str, 5, 5) <- "/"
title <- paste("太陽黒点相対数の推移（月平均; 1929/01 ～ ", str, "）", sep ="")

g <- ggplot(sunspots.df, aes(x = Date, y = Total))
g <- g + geom_line(color = "darkblue", size = 0.1)
g <- g + geom_point(size = 0.5)
g <- g + xlab("year") + ylab("Number of Sun Spots")
g <- g + ggtitle(title)
g <- g + theme_economist()
g <- g + theme(axis.title=element_text(size=14))
print(g)