関東甲信の梅雨/期間と降水量の関係

以前の記事「関東甲信・梅雨入り日の推移」の続き。

気象庁のウェブページにある表:

f:id:fusion0202:20190610191223p:plain

これをスクレイピングして、各年の梅雨の期間(日数)を求め、これと降水量の平年比との関係を調べる。 

library(rvest)
library(dplyr)
library(stringr)
library(ggplot2)

dt <- read_html("https://www.data.jma.go.jp/fcd/yoho/baiu/kako_baiu09.html")

dt %>% html_nodes(xpath = "//table") %>% html_text() %>%
str_replace_all(pattern = "-", replacement = ",") %>%
str_replace_all(pattern = "ごろ", replacement = ",") %>%
str_replace_all(pattern = "年", replacement = "-") %>%
str_replace_all(pattern = "月", replacement = "-") %>%
str_replace_all(pattern = "日", replacement = "") %>%
str_split("\n", simplify = T) -> tab

tab2 <- str_split(tab[2,], ",", simplify = T)
tab3 <- tab2[6:(nrow(tab2)-2),]

 ここまで実行すると、以下のような文字列のテーブルができる。

> head(tab3)
       [,1]                 [,2]       [,3]
[1,]  "1951-6-15"  "7-18"  "121 "
[2,]  "1952-6-14"  "7-16"  "151 "
[3,]  "1953-6- 1"   "7-24"  "149 "
[4,]  "1954-6- 6"   "7-25"  "135 "
[5,]  "1955-6-13"  "7- 9"   "60 "
[6,]  "1956-6- 9"   "7-26"  "81 "

 

 以下、梅雨入りの日と梅雨明けの日の差分を求め、年号および降水量とともにデータフレームを作る。

tab3[,1] %>% str_sub(start = 1, end = 4) -> Year
Duration <- as.Date(paste0(Year, "-", tab3[,2])) - as.Date(tab3[,1])
duration <- as.numeric(Duration)
rainfall <- as.numeric(tab3[,3])
year <- as.numeric(Year)

df <- data.frame(year, duration, rainfall)

 

 梅雨の日数の推移のプロット:

g <- ggplot(df,aes(x = year, y = duration))
g <- g + geom_line(color = "darkblue", size = 0.5, alpha = 0.4)
g <- g + geom_point(color = "darkblue")
g <- g + theme(axis.title = element_text(size = 14))
g <- g + ylim(0,80)
g <- g + ylab("Duration (days)")
g <- g + ggtitle("梅雨の期間(日)")
g <- g + theme(plot.title = element_text(size=18))
print(g)

f:id:fusion0202:20190610192605p:plain

 

 降水量平年比の推移のプロット:

g <- ggplot(df,aes(x = year, y = rainfall))
g <- g + geom_line(color = "darkblue", size = 0.5, alpha = 0.4)
g <- g + geom_point(color = "darkblue")
g <- g + theme(axis.title = element_text(size = 14))
g <- g + ylim(0,160)
g <- g + ylab("Rainfall (average ratio; %)")
g <- g + ggtitle("降水量平年比(%)")
g <- g + theme(plot.title = element_text(size = 18))
print(g)

f:id:fusion0202:20190610192924p:plain


 最後に期間と降水量の散布図:

g <- ggplot(df,aes(x = duration, y = rainfall))
g <- g + geom_point(color = "darkblue")
g <- g + theme(axis.title = element_text(size = 14))
g <- g + xlim(20,80) + ylim(40,160)
g <- g + xlab("Duration (day)") + ylab("Rainfall (average ratio; %)")
g <- g + ggtitle("梅雨の期間と降水量平年比")
g <- g + theme(plot.title = element_text(size = 18))
g <- g + stat_smooth(method = "lm", se = T, color = "grey60")
print(g)

f:id:fusion0202:20190610193131p:plain

ほんの少し、正の相関があるように見えるが、

> cor.test(df$rainfall, df$duration, use = "complete.obs")

 

             Pearson's product-moment correlation

 

data:    df$rainfall  and  df$duration
t = 1.459,  df = 65,  p-value = 0.1494
alternative hypothesis:  true correlation is not equal to 0
95 percent confidence interval:
-0.06491531   0.40112109
sample estimates:
       cor
0.17807

p-value = 0.1494 なので、有意ではない。