Assignment 6

Using my ebird dataset: First calculate the total number of species seen each month of each year in each location. Then plot the number of species seen each month with the color of the points indicating year and facet this plot by location.

require(tidyverse)

## Loading required package: tidyverse

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(ggplot2) 
library(ggthemes) #extended themes for ggplot
library(patchwork) #tools for composing multiple panels

#Import and assign data to variable
MBT_ebird <- read_csv("MBT_ebird.csv")

## New names:
## Rows: 6595 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): list_ID, common_name, scientific_name, location dbl (8): ...1, count,
## duration, latitude, longitude, count_tot, month, year date (1): date time (1):
## time
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

# grouping by month, year, and location
grouped_ebird <- group_by(MBT_ebird, month, year, location)

# Adding a column that added up number of species by scientific name
num_ea_species <- add_count(grouped_ebird, scientific_name)

# Summarizing to fetch number per month per year per location
sum_species <- summarize(num_ea_species, n = n())

## `summarise()` has grouped output by 'month', 'year'. You can override using the
## `.groups` argument.

# create plot of n vs month with the color showing the year
p1 <- ggplot(data=sum_species) +                                   
  aes(x=n,y=month,color=year) + 
  geom_point(size=3) 
print(p1)

Using the dataset from Assignment #5: Plot a comparison of mass by treatment including the individual observations, the mean, and standard error of the mean. Use point color or shape to indicate the sex.

# On to Question 2 

dataset <- read_csv("clean_data.csv")

## Rows: 20 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gender, Group
## dbl (4): Sample, body_length, age, Mass
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

p2 <- ggplot(data = dataset, aes(Group, Mass) )+ 
  geom_jitter(size =2, aes(Group, Mass, color = Gender))+
  xlab("Treatment") + 
  ylab("Mass") +
  stat_summary(fun = mean,  
               geom = "crossbar", 
               width = 0.2, 
               color = "green") +
  stat_summary(geom = "errorbar",  
               width = 0.5)+
  labs(color="Gender") #edit legend title

p2  #output graph

## Warning: Removed 4 rows containing non-finite values (stat_summary).
## Removed 4 rows containing non-finite values (stat_summary).

## No summary function supplied, defaulting to `mean_se()`

## Warning: Removed 4 rows containing missing values (geom_point).

Generate a scatter plot of age and mass, indicate treatment with point shape or color, and fit separate regression lines (without CI) to each treatment.

p3 <- ggplot(data = dataset, aes(age, Mass))+ 
  geom_point(size = 3, aes(age, Mass, shape = Group))+ 
  xlab("Age")+ 
  ylab("Mass")+
  geom_smooth(size = 1, method = lm, # regression line
              aes(color = Group,  group = Group), #have a line for each treatment
              se = FALSE)+ #no standard error
  labs(color="Group")

p3 #output graph

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 4 rows containing non-finite values (stat_smooth).

## Warning: Removed 4 rows containing missing values (geom_point).

Combine the plots from 2 and 3 using patchwork tag each panel with and number or letter and include a title for the overall plot.

p2 / p3 + plot_annotation('Comparison of Mass with Age and Treatment Group', tag_levels = c("A", "B"))

## Warning: Removed 4 rows containing non-finite values (stat_summary).
## Removed 4 rows containing non-finite values (stat_summary).

## No summary function supplied, defaulting to `mean_se()`

## Warning: Removed 4 rows containing missing values (geom_point).

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 4 rows containing non-finite values (stat_smooth).

## Warning: Removed 4 rows containing missing values (geom_point).

In an Rmarkdown document present your code with the figures generated inline.

Assignment 6

2022-12-13