関東甲信の梅雨／期間と降水量の関係

気象庁のウェブページにある表：

f:id:fusion0202:20190610191223p:plain

これをスクレイピングして、各年の梅雨の期間（日数）を求め、これと降水量の平年比との関係を調べる。

library(rvest)
library(dplyr)
library(stringr)
library(ggplot2)

dt <- read_html("https://www.data.jma.go.jp/fcd/yoho/baiu/kako_baiu09.html")

dt %>% html_nodes(xpath = "//table") %>% html_text() %>%
str_replace_all(pattern = "－", replacement = ",") %>%
str_replace_all(pattern = "ごろ", replacement = ",") %>%
str_replace_all(pattern = "年", replacement = "-") %>%
str_replace_all(pattern = "月", replacement = "-") %>%
str_replace_all(pattern = "日", replacement = "") %>%
str_split("\n", simplify = T) -> tab

tab2 <- str_split(tab[2,], ",", simplify = T)
tab3 <- tab2[6:(nrow(tab2)-2),]

ここまで実行すると、以下のような文字列のテーブルができる。

> head(tab3)
       [,1]                 [,2]       [,3]
[1,] "1951-6-15" "7-18" "121 "
[2,] "1952-6-14" "7-16" "151 "
[3,] "1953-6- 1"   "7-24" "149 "
[4,] "1954-6- 6"   "7-25" "135 "
[5,] "1955-6-13" "7- 9" "60 "
[6,] "1956-6- 9"   "7-26" "81 "

以下、梅雨入りの日と梅雨明けの日の差分を求め、年号および降水量とともにデータフレームを作る。

tab3[,1] %>% str_sub(start = 1, end = 4) -> Year
Duration <- as.Date(paste0(Year, "-", tab3[,2])) - as.Date(tab3[,1])
duration <- as.numeric(Duration)
rainfall <- as.numeric(tab3[,3])
year <- as.numeric(Year)

df <- data.frame(year, duration, rainfall)

梅雨の日数の推移のプロット：

g <- ggplot(df,aes(x = year, y = duration))
g <- g + geom_line(color = "darkblue", size = 0.5, alpha = 0.4)
g <- g + geom_point(color = "darkblue")
g <- g + theme(axis.title = element_text(size = 14))
g <- g + ylim(0,80)
g <- g + ylab("Duration (days)")
g <- g + ggtitle("梅雨の期間（日）")
g <- g + theme(plot.title = element_text(size=18))
print(g)

f:id:fusion0202:20190610192605p:plain

降水量平年比の推移のプロット：

g <- ggplot(df,aes(x = year, y = rainfall))
g <- g + geom_line(color = "darkblue", size = 0.5, alpha = 0.4)
g <- g + geom_point(color = "darkblue")
g <- g + theme(axis.title = element_text(size = 14))
g <- g + ylim(0,160)
g <- g + ylab("Rainfall (average ratio; %)")
g <- g + ggtitle("降水量平年比（％）")
g <- g + theme(plot.title = element_text(size = 18))
print(g)

f:id:fusion0202:20190610192924p:plain

最後に期間と降水量の散布図：

g <- ggplot(df,aes(x = duration, y = rainfall))
g <- g + geom_point(color = "darkblue")
g <- g + theme(axis.title = element_text(size = 14))
g <- g + xlim(20,80) + ylim(40,160)
g <- g + xlab("Duration (day)") + ylab("Rainfall (average ratio; %)")
g <- g + ggtitle("梅雨の期間と降水量平年比")
g <- g + theme(plot.title = element_text(size = 18))
g <- g + stat_smooth(method = "lm", se = T, color = "grey60")
print(g)

f:id:fusion0202:20190610193131p:plain

ほんの少し、正の相関があるように見えるが、

> cor.test(df$rainfall, df$duration, use = "complete.obs")

             Pearson's product-moment correlation

data:    df$rainfall and df$duration
t = 1.459, df = 65, p-value = 0.1494
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.06491531   0.40112109
sample estimates:
       cor
0.17807

p-value = 0.1494 なので、有意ではない。