require(tidyverse)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(ggthemes) #extended themes for ggplot
library(patchwork) #tools for composing multiple panels
#Import and assign data to variable
MBT_ebird <- read_csv("MBT_ebird.csv")
## New names:
## Rows: 6595 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): list_ID, common_name, scientific_name, location dbl (8): ...1, count,
## duration, latitude, longitude, count_tot, month, year date (1): date time (1):
## time
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# grouping by month, year, and location
grouped_ebird <- group_by(MBT_ebird, month, year, location)
# Adding a column that added up number of species by scientific name
num_ea_species <- add_count(grouped_ebird, scientific_name)
# Summarizing to fetch number per month per year per location
sum_species <- summarize(num_ea_species, n = n())
## `summarise()` has grouped output by 'month', 'year'. You can override using the
## `.groups` argument.
# create plot of n vs month with the color showing the year
p1 <- ggplot(data=sum_species) +
aes(x=n,y=month,color=year) +
geom_point(size=3)
print(p1)
# On to Question 2
dataset <- read_csv("clean_data.csv")
## Rows: 20 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gender, Group
## dbl (4): Sample, body_length, age, Mass
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
p2 <- ggplot(data = dataset, aes(Group, Mass) )+
geom_jitter(size =2, aes(Group, Mass, color = Gender))+
xlab("Treatment") +
ylab("Mass") +
stat_summary(fun = mean,
geom = "crossbar",
width = 0.2,
color = "green") +
stat_summary(geom = "errorbar",
width = 0.5)+
labs(color="Gender") #edit legend title
p2 #output graph
## Warning: Removed 4 rows containing non-finite values (stat_summary).
## Removed 4 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 4 rows containing missing values (geom_point).
p3 <- ggplot(data = dataset, aes(age, Mass))+
geom_point(size = 3, aes(age, Mass, shape = Group))+
xlab("Age")+
ylab("Mass")+
geom_smooth(size = 1, method = lm, # regression line
aes(color = Group, group = Group), #have a line for each treatment
se = FALSE)+ #no standard error
labs(color="Group")
p3 #output graph
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
p2 / p3 + plot_annotation('Comparison of Mass with Age and Treatment Group', tag_levels = c("A", "B"))
## Warning: Removed 4 rows containing non-finite values (stat_summary).
## Removed 4 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 4 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
In an Rmarkdown document present your code with the figures generated inline.