library(stringr)
library(dplyr)
library(readr)
mockstring <- read_csv('https://raw.githubusercontent.com/rsquaredacademy/datasets/master/mock_strings.csv')
## # A tibble: 1,000 x 12
## id image_url domain imageurl email filename phone address url
## <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 https://r~ addto~ http://~ mnew~ PedeMal~ 66-(~ 8 Anha~ https://~
## 2 2 https://r~ gmpg.~ http://~ mdan~ Loborti~ 351-~ 697 Ea~ http://d~
## 3 3 https://r~ samsu~ http://~ hgir~ CongueD~ 33-(~ 89 Dot~ https://~
## 4 4 https://r~ spoti~ http://~ pmcm~ Eleifen~ 86-(~ 98135 ~ http://i~
## 5 5 https://r~ wunde~ http://~ dris~ PurusPh~ 223-~ 7814 P~ https://~
## 6 6 https://r~ alexa~ http://~ cphl~ Element~ 420-~ 4897 L~ https://~
## 7 7 https://r~ googl~ http://~ kdod~ Mattis.~ 1-(7~ 53541 ~ http://v~
## 8 8 https://r~ ed.gov http://~ vhou~ PurusEu~ 62-(~ 4819 H~ https://~
## 9 9 https://r~ jigsy~ http://~ rdik~ JustoEt~ 1-(6~ 68096 ~ https://~
## 10 10 https://r~ jugem~ http://~ tdud~ Ante.ti~ 30-(~ 9595 S~ https://~
## # ... with 990 more rows, and 3 more variables: full_name <chr>,
## # currency <chr>, passwords <chr>
mock_data
## # A tibble: 10 x 4
## email address full_name currency
## <chr> <chr> <chr> <chr>
## 1 mnewburn0@fastcompany.com 8 Anhalt Crossing Mufi Ruit ¥34.37
## 2 mdankersley1@digg.com 697 East Avenue Leese Furmagi~ $67.37
## 3 hgirhard2@altervista.org 89 Dottie Circle Blakelee Wils~ €33,85
## 4 pmcmenamy3@sciencedirect.com 98135 Blue Bill P~ Terencio McIl~ €42,89
## 5 drisbrough4@bandcamp.com 7814 Pennsylvania~ Debee McErlai~ €13,19
## 6 cphlippi5@surveymonkey.com 4897 Little Fleur~ Fran Painten ¥87.35
## 7 kdodswell6@un.org 53541 Morrow Cent~ Frasco Bowich $34.89
## 8 vhourihane7@ovh.net 4819 Hermina Park~ Car Ponten ¥41.66
## 9 rdike8@timesonline.co.uk 68096 Monument Pa~ Tades Checcuc~ €70,80
## 10 tdudbridge9@clickbank.net 9595 Spaight Aven~ Wilton Kemmey €62,76
str_detect(mock_data$email, pattern = "@")
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
str_count(mock_data$email, pattern = "@")
## [1] 1 1 1 1 1 1 1 1 1 1
str_c("email id:", mock_data$email)
## [1] "email id:mnewburn0@fastcompany.com"
## [2] "email id:mdankersley1@digg.com"
## [3] "email id:hgirhard2@altervista.org"
## [4] "email id:pmcmenamy3@sciencedirect.com"
## [5] "email id:drisbrough4@bandcamp.com"
## [6] "email id:cphlippi5@surveymonkey.com"
## [7] "email id:kdodswell6@un.org"
## [8] "email id:vhourihane7@ovh.net"
## [9] "email id:rdike8@timesonline.co.uk"
## [10] "email id:tdudbridge9@clickbank.net"
str_split(mock_data$email, pattern = "@")
## [[1]]
## [1] "mnewburn0" "fastcompany.com"
##
## [[2]]
## [1] "mdankersley1" "digg.com"
##
## [[3]]
## [1] "hgirhard2" "altervista.org"
##
## [[4]]
## [1] "pmcmenamy3" "sciencedirect.com"
##
## [[5]]
## [1] "drisbrough4" "bandcamp.com"
##
## [[6]]
## [1] "cphlippi5" "surveymonkey.com"
##
## [[7]]
## [1] "kdodswell6" "un.org"
##
## [[8]]
## [1] "vhourihane7" "ovh.net"
##
## [[9]]
## [1] "rdike8" "timesonline.co.uk"
##
## [[10]]
## [1] "tdudbridge9" "clickbank.net"
str_sort(mock_data$email)
## [1] "cphlippi5@surveymonkey.com" "drisbrough4@bandcamp.com"
## [3] "hgirhard2@altervista.org" "kdodswell6@un.org"
## [5] "mdankersley1@digg.com" "mnewburn0@fastcompany.com"
## [7] "pmcmenamy3@sciencedirect.com" "rdike8@timesonline.co.uk"
## [9] "tdudbridge9@clickbank.net" "vhourihane7@ovh.net"
str_sort(mock_data$email, descending = TRUE)
## [1] "cphlippi5@surveymonkey.com" "drisbrough4@bandcamp.com"
## [3] "hgirhard2@altervista.org" "kdodswell6@un.org"
## [5] "mdankersley1@digg.com" "mnewburn0@fastcompany.com"
## [7] "pmcmenamy3@sciencedirect.com" "rdike8@timesonline.co.uk"
## [9] "tdudbridge9@clickbank.net" "vhourihane7@ovh.net"
str_to_upper(mock_data$full_name)
## [1] "MUFI RUIT" "LEESE FURMAGIER" "BLAKELEE WILSHIRE"
## [4] "TERENCIO MCILLRICK" "DEBEE MCERLAINE" "FRAN PAINTEN"
## [7] "FRASCO BOWICH" "CAR PONTEN" "TADES CHECCUCCI"
## [10] "WILTON KEMMEY"
str_replace(mock_data$address, "Street", "ST")
## [1] "8 Anhalt Crossing" "697 East Avenue"
## [3] "89 Dottie Circle" "98135 Blue Bill Park Drive"
## [5] "7814 Pennsylvania ST" "4897 Little Fleur Drive"
## [7] "53541 Morrow Center" "4819 Hermina Parkway"
## [9] "68096 Monument Park" "9595 Spaight Avenue"
str_extract(mock_data$email, pattern = "com")
## [1] "com" "com" NA "com" "com" "com" NA NA NA NA
str_match(mock_data$email, pattern = "com")
## [,1]
## [1,] "com"
## [2,] "com"
## [3,] NA
## [4,] "com"
## [5,] "com"
## [6,] "com"
## [7,] NA
## [8,] NA
## [9,] NA
## [10,] NA
str_which(mock_data$email, pattern = "com")
## [1] 1 2 4 5 6
str_locate(mock_data$email, pattern = "com")
## start end
## [1,] 15 17
## [2,] 19 21
## [3,] NA NA
## [4,] 26 28
## [5,] 22 24
## [6,] 24 26
## [7,] NA NA
## [8,] NA NA
## [9,] NA NA
## [10,] NA NA
str_sub(mock_data$currency, start = 1, end = 1)
## [1] "Â¥" "$" "\200" "\200" "\200" "Â¥" "$" "Â¥" "\200" "\200"
word(mock_data$full_name, 1)
## [1] "Mufi" "Leese" "Blakelee" "Terencio" "Debee" "Fran"
## [7] "Frasco" "Car" "Tades" "Wilton"