--- title: "StringrExamples" author: "Kathleen" date: "September 21, 2017" output: pdf_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ## R Markdown ```{r} library(tidyverse) library(stringr) string1 <- "This is a string" string2 <- 'If I want to include a "quote" inside a string, I use single quotes' x <- c("\"", "\\") x x <- c("apple", "banana", "pear") # this is a character vector str_length(x) # returns a vector with the length of each string str_length("Welcome to DS4100") str_length(c("kathleen", "gregg", "ava")) str_c("k","c") str_c("k","c", sep = "|") str_c("k","c", sep = "|") y <- c( "doll", "yoyo", NA) str_length(y) str_replace_na(y) # substitute the "NA" string for NA str_c(c("k","t","d"),collapse ="") # make it 1 string str_c(c("k","t","d"),collapse =",") # make it 1 string #collapse reduces all elements to 1 str_c(c("k","t","d"),c("a","m","s"), collapse ="|") # str_c is vectorized str_c(c("k","t","d"),c("a","m","s"), sep =",") # result has 3 elements #collapse reduces all elements to 1 str_c(c("k","t","d"),c("a","m","s"), sep = ",", collapse ="|" ) ``` ```{r} colours <- c("red", "orange", "yellow", "green", "blue", "purple") colour_match <- str_c(colours, collapse = "|") colour_match #built in sentences to use for testing localSentences <- sentences localSentences <- c("17 Barbara Rd", "18 Stella Rd.", "UNKNOWN") # pull out substrings by position str_sub("Apple",1,3) str_sub(c("Apple","Pear", "Plum"), 1, 3) str_sub(c("Apple","Pear", "Plum","Te"), 1, 3) # only 2 characters in last entry #functions for cleaning up strings str_to_lower("Apple") str_to_upper("apple pear") str_to_title("THIS IS NOT WHAT i WANT") ``` ```{r} str_trim(" Now is the time ") str_wrap(c("the time is notw right for revolting"), width = 1) # str_view takes a character vector and a regular expression and shows you the match in the string for the regular expression # an array of character vectors zz <- c("apple", "pear", "plum") #find specific substrings str_view(zz,"pl") str_view(zz,"p") # match 0 or 1 p's in the strings str_view(c("apple","pear","plum"),"p?") x <- c("apple", "banana", "pear") # this is a character vector xp <- c("apple.", "banana.", "pear.") # this is another character vector with an embedded dot xb <- c("apple\\", "banana\\", "pear\\") # this is a character vector with an embedded \ str_view(x, "an") str_view(x, ".a.") # a dot matches any character but a new line str_view(xp, "e\\.") # match e followed by a dot str_view(xb, "r\\\\") # match r followed by a \ ``` ```{r } # anchoring c("apple\\", "banana\\", "pear\\") %>% str_match("^a") # only match c("apple\\", "banana\\", "pear\\") %>% str_view("^a") # only match str_view("12345", "^\\d\\d\\d") str_view("12345", "[^12]") # this means not 1 or 2 str_view_all("12345", "[^12]") str_view(x,"p?") # want 0 or 1 p in the string str_view(x,"p{1,2}") palidrome <- c("anna", "eve", "hannah") str_view(palidrome, "(.)(.)\\2\\1") str_view(palidrome, "(.)(.)\\2\\1?") str_view(palidrome, "(.)(.)(.)\\3\\2\\1?") # detect if a string matches a regular expression str_detect(x,"e") sum(str_detect(x,"e")) mean(str_detect(x,"e")) ``` ```{r} str_extract(x,"p..") #substrings that start with a p and are 3 chars. xc <- x xd <- x str_replace(xc, "p..", "x") str_replace_all(xc, c("p"= "x", "l" = "y")) str_split("apples corn pears", " ") str_split("apples corn pears", "p..") str_locate("apples corn pears", "p..") str_locate(x, "p..") # returns a matrix # matching substrings str_match(localSentences, "[1234567890]+") str_match(c("adad","nana","anna"), "(..)+\\1") str_match(c("adad","nana","anna"), "(..)+") str_match(c("adad","nana","anna"), "(.)(.)+\\2\\1") str_match(c("adada","nana","annaanna"), "(.)(.)\\2\\1") str_detect(c("adada","nana","annaanna","adda", "addda"), "(.)(.)\\2\\2\\1") str_match(localSentences, "\\d+") ds <- c (localSentences, "123", "456") str_replace(ds, "[1|2|3|4|5|6|7|8|9|0]", "-") str_replace_all(ds, "[1|2|3|4|5|6|7|8|9|0]", "-") ``` ```{r} library(forcats) month_levels = c( "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec") x1 <- c("Dec", "Apr", "Jan", "Mar") y1 <- factor(x1, levels = month_levels) y1 #> [1] Dec Apr Jan Mar #> Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec sort(y1) ``` ```