##
www.imdb.com No encoding supplied: defaulting to UTF-8.
## [1] TRUE
## {xml_document}
## <html xmlns:og="http://ogp.me/ns#" xmlns:fb="http://www.facebook.com/2008/fbml">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset= ...
## [2] <body id="styleguide-v2" class="fixed">\n\n <img height=" ...
## [1] "The Shawshank Redemption"
## [2] "The Godfather"
## [3] "The Dark Knight"
## [4] "The Godfather: Part II"
## [5] "The Lord of the Rings: The Return of the King"
## [6] "Pulp Fiction"
## [7] "Schindler's List"
## [8] "Il buono, il brutto, il cattivo"
## [9] "12 Angry Men"
## [10] "Inception"
## [11] "Fight Club"
## [12] "The Lord of the Rings: The Fellowship of the Ring"
## [13] "Forrest Gump"
## [14] "The Lord of the Rings: The Two Towers"
## [15] "The Matrix"
## [16] "Goodfellas"
## [17] "Star Wars: Episode V - The Empire Strikes Back"
## [18] "One Flew Over the Cuckoo's Nest"
## [19] "Shichinin no samurai"
## [20] "Interstellar"
## [21] "Cidade de Deus"
## [22] "Sen to Chihiro no kamikakushi"
## [23] "Saving Private Ryan"
## [24] "The Green Mile"
## [25] "La vita è bella"
## [26] "The Usual Suspects"
## [27] "Se7en"
## [28] "Léon"
## [29] "The Silence of the Lambs"
## [30] "Star Wars"
## [31] "It's a Wonderful Life"
## [32] "Andhadhun"
## [33] "Dangal"
## [34] "Spider-Man: Into the Spider-Verse"
## [35] "Avengers: Infinity War"
## [36] "Whiplash"
## [37] "The Intouchables"
## [38] "The Prestige"
## [39] "The Departed"
## [40] "The Pianist"
## [41] "Memento"
## [42] "Gladiator"
## [43] "American History X"
## [44] "The Lion King"
## [45] "Terminator 2: Judgment Day"
## [46] "Nuovo Cinema Paradiso"
## [47] "Hotaru no haka"
## [48] "Back to the Future"
## [49] "Raiders of the Lost Ark"
## [50] "Apocalypse Now"
imdb %>%
html_nodes(".lister-item-content h3 .lister-item-year") %>%
html_text() %>%
str_sub(start = 2, end = 5) %>%
as.Date(format = "%Y") %>%
year() -> movie_year
movie_year
## [1] 1994 1972 2008 1974 2003 1994 1993 1966 1957 2010 1999 2001 1994 2002
## [15] 1999 1990 1980 1975 1954 2014 2002 2001 1998 1999 1997 1995 1995 1994
## [29] 1991 1977 1946 2018 2016 2018 2018 2014 2011 2006 2006 2002 2000 2000
## [43] 1998 1994 1991 1988 1988 1985 1981 1979
imdb %>%
html_nodes(".lister-item-content p .certificate") %>%
html_text() -> movie_certificate
movie_certificate
## [1] "A" "A" "UA" "PG-13" "A" "A" "UA" "A"
## [9] "PG-13" "PG-13" "PG-13" "A" "A" "PG" "UA" "R"
## [17] "PG" "A" "A" "PG-13" "A" "R" "A" "A"
## [25] "U" "PG" "UA" "U" "U" "UA" "A" "UA"
## [33] "PG-13" "A" "R" "R" "R" "A" "U" "U"
## [41] "R" "U" "PG" "R"
imdb %>%
html_nodes(".lister-item-content p .runtime") %>%
html_text() %>%
str_split(" ") %>%
map_chr(1) %>%
as.numeric() -> movie_runtime
movie_runtime
## [1] 142 175 152 202 201 154 195 161 96 148 139 178 142 179 136 146 124
## [18] 133 207 169 130 125 169 189 116 106 127 110 118 121 130 139 161 117
## [35] 149 106 112 130 151 150 113 155 119 88 137 155 89 116 115 147
imdb %>%
html_nodes(".lister-item-content p .genre") %>%
html_text() %>%
str_trim() -> movie_genre
movie_genre
## [1] "Drama" "Crime, Drama"
## [3] "Action, Crime, Drama" "Crime, Drama"
## [5] "Adventure, Drama, Fantasy" "Crime, Drama"
## [7] "Biography, Drama, History" "Western"
## [9] "Drama" "Action, Adventure, Sci-Fi"
## [11] "Drama" "Adventure, Drama, Fantasy"
## [13] "Drama, Romance" "Adventure, Drama, Fantasy"
## [15] "Action, Sci-Fi" "Biography, Crime, Drama"
## [17] "Action, Adventure, Fantasy" "Drama"
## [19] "Adventure, Drama" "Adventure, Drama, Sci-Fi"
## [21] "Crime, Drama" "Animation, Adventure, Family"
## [23] "Drama, War" "Crime, Drama, Fantasy"
## [25] "Comedy, Drama, Romance" "Crime, Mystery, Thriller"
## [27] "Crime, Drama, Mystery" "Action, Crime, Drama"
## [29] "Crime, Drama, Thriller" "Action, Adventure, Fantasy"
## [31] "Drama, Family, Fantasy" "Crime, Thriller"
## [33] "Action, Biography, Drama" "Animation, Action, Adventure"
## [35] "Action, Adventure, Sci-Fi" "Drama, Music"
## [37] "Biography, Comedy, Drama" "Drama, Mystery, Sci-Fi"
## [39] "Crime, Drama, Thriller" "Biography, Drama, Music"
## [41] "Mystery, Thriller" "Action, Adventure, Drama"
## [43] "Drama" "Animation, Adventure, Drama"
## [45] "Action, Sci-Fi" "Drama"
## [47] "Animation, Drama, War" "Adventure, Comedy, Sci-Fi"
## [49] "Action, Adventure" "Drama, War"
imdb %>%
html_nodes(".ratings-bar .ratings-imdb-rating") %>%
html_attr("data-value") %>%
as.numeric() -> movie_rating
movie_rating
## [1] 9.3 9.2 9.0 9.0 8.9 8.9 8.9 8.9 8.9 8.8 8.8 8.8 8.8 8.7 8.7 8.7 8.7
## [18] 8.7 8.7 8.6 8.6 8.6 8.6 8.6 8.6 8.6 8.6 8.6 8.6 8.6 8.6 8.5 8.5 8.5
## [35] 8.5 8.5 8.5 8.5 8.5 8.5 8.5 8.5 8.5 8.5 8.5 8.5 8.5 8.5 8.5 8.5
imdb %>%
html_nodes(xpath = '//meta[@itemprop="ratingCount"]') %>%
html_attr('content') %>%
as.numeric() -> movie_votes
movie_votes
## [1] 2072893 1422292 2038787 987020 1475650 1621033 1074273 615219
## [9] 585562 1817393 1658750 1492209 1589127 1334563 1489071 895033
## [17] 1040130 822277 280024 1276946 637716 549410 1096231 1000909
## [25] 545280 897576 1271530 913352 1118817 1109777 352837 39132
## [33] 118413 174125 617621 605417 666327 1052901 1064050 633675
## [41] 1021511 1198326 941917 823238 897607 198398 192715 923178
## [49] 803033 542311
imdb %>%
html_nodes(xpath = '//span[@name="nv"]') %>%
html_text() %>%
str_extract(pattern = "^\\$.*") %>%
na.omit() %>%
as.character() %>%
append(values = NA, after = 30) %>%
append(values = NA, after = 46) %>%
str_sub(start = 2, end = nchar(.) - 1) %>%
as.numeric() -> movie_revenue
movie_revenue
## [1] 28.34 134.97 534.86 57.30 377.85 107.93 96.07 6.10 4.36 292.58
## [11] 37.03 315.54 330.25 342.55 171.48 46.84 290.48 112.00 0.27 188.02
## [21] 7.56 10.06 216.54 136.80 57.60 23.34 100.13 19.50 130.74 322.74
## [31] NA 1.19 12.39 190.24 678.82 13.09 13.18 53.09 132.38 32.57
## [41] 25.54 187.71 6.72 312.90 204.84 11.99 NA 210.61 248.16 83.47
top_50 <- tibble(title = movie_title, release = movie_year,
`runtime (mins)` = movie_runtime, genre = movie_genre, rating = movie_rating,
votes = movie_votes, `revenue ($ millions)` = movie_revenue)
top_50
## # A tibble: 50 x 7
## title release `runtime (mins)` genre rating votes `revenue ($ mil~
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 The Sha~ 1994 142 Drama 9.3 2.07e6 28.3
## 2 The God~ 1972 175 Crime,~ 9.2 1.42e6 135.
## 3 The Dar~ 2008 152 Action~ 9 2.04e6 535.
## 4 The God~ 1974 202 Crime,~ 9 9.87e5 57.3
## 5 The Lor~ 2003 201 Advent~ 8.9 1.48e6 378.
## 6 Pulp Fi~ 1994 154 Crime,~ 8.9 1.62e6 108.
## 7 Schindl~ 1993 195 Biogra~ 8.9 1.07e6 96.1
## 8 Il buon~ 1966 161 Western 8.9 6.15e5 6.1
## 9 12 Angr~ 1957 96 Drama 8.9 5.86e5 4.36
## 10 Incepti~ 2010 148 Action~ 8.8 1.82e6 293.
## # ... with 40 more rows
paths_allowed(
paths = c("https://en.wikipedia.org/wiki/List_of_Governors_of_Reserve_Bank_of_India")
)
##
en.wikipedia.org
## [1] TRUE
rbi_guv <- read_html("https://en.wikipedia.org/wiki/List_of_Governors_of_Reserve_Bank_of_India")
rbi_guv
## {xml_document}
## <html class="client-nojs" lang="en" dir="ltr">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset= ...
## [2] <body class="mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-sub ...
## No. Officeholder Portrait Term start Term end
## 1 1 Osborne Smith NA 1 April 1935 30 June 1937
## 2 2 James Braid Taylor NA 1 July 1937 17 February 1943
## 3 3 C. D. Deshmukh NA 11 August 1943ii 30 May 1949
## 4 4 Benegal Rama Rau NA 1 July 1949 14 January 1957
## 5 5 K. G. Ambegaonkar NA 14 January 1957 28 February 1957
## 6 6 H. V. R. Iyengar NA 1 March 1957 28 February 1962
## 7 7 P. C. Bhattacharya NA 1 March 1962 30 June 1967
## 8 8 Lakshmi Kant Jha NA 1 July 1967 3 May 1970
## 9 9 B. N. Adarkar NA 4 May 1970 15 June 1970
## 10 10 Sarukkai Jagannathan NA 16 June 1970 19 May 1975
## 11 11 N. C. Sen Gupta NA 19 May 1975 19 August 1975
## 12 12 K. R. Puri NA 20 August 1975 2 May 1977
## 13 13 M. Narasimham NA 3 May 1977 30 November 1977
## 14 14 I. G. Patel NA 1 December 1977 15 September 1982
## 15 15 Manmohan Singh NA 16 September 1982 14 January 1985
## 16 16 Amitav Ghosh NA 15 January 1985 4 February 1985
## 17 17 R. N. Malhotra NA 4 February 1985 22 December 1990
## 18 18 S. Venkitaramanan NA 22 December 1990 21 December 1992
## 19 19 C. Rangarajan NA 22 December 1992 21 November 1997
## 20 20 Bimal Jalan NA 22 November 1997 6 September 2003
## 21 21 Y. Venugopal Reddy NA 6 September 2003 5 September 2008
## 22 22 D. Subbarao NA 5 September 2008 4 September 2013
## 23 23 Raghuram Rajan NA 4 September 2013 4 September 2016
## 24 24 Urjit Patel NA 4 September 2016 11 December 2018
## 25 25 Shaktikanta Das NA 12 December 2018 Incumbent
## Term in office Background
## 1 821Â days Banker
## 2 2057Â days Indian Civil Service (ICS) officer
## 3 2150Â days ICS officer
## 4 2754Â days ICS officer
## 5 45Â days ICS officer
## 6 1825Â days ICS officer
## 7 1947Â days Indian Audit and Accounts Service officer
## 8 1037Â days ICS officer
## 9 42Â days Economist
## 10 1798Â days ICS officer
## 11 92Â days ICS officer
## 12 621Â days
## 13 211Â days Career Reserve Bank of India officer
## 14 1749Â days Economist
## 15 851Â days Economist
## 16 20Â days Banker
## 17 2147Â days Indian Administrative Service (IAS) officer
## 18 730Â days IAS officer
## 19 1795Â days Economist
## 20 2114Â days Economist
## 21 1826Â days IAS officer
## 22 1825Â days IAS officer
## 23 1096Â days Economist
## 24 947Â days Economist
## 25 118Â days IAS officer
## Prior office(s)
## 1 Managing Governor of the Imperial Bank of India
## 2 Deputy Governor of the Reserve Bank of India\n\nController of Currency
## 3 Deputy Governor of the Reserve Bank of India\nCustodian of Enemy Property
## 4 Ambassador of India to the United States\n\nAmbassador of India to Japan\n\nChairman of Bombay Port Trust
## 5 Finance Secretary
## 6 Chairman of the State Bank of India
## 7 Chairman of the State Bank of India\nSecretary in the Ministry of Finance
## 8 Secretary to the Prime Minister of India
## 9 Executive Director at the International Monetary Fund
## 10 Executive Director at the World Bank
## 11 Banking Secretary
## 12 Chairman and Managing Director of the Life Insurance Corporation
## 13 Deputy Governor of the Reserve Bank of India
## 14 Director of the London School of Economics\n\nDeputy Administrator of the United Nations Development Programme\nChief Economic Adviser to the Government of India
## 15 Secretary in the Ministry of Finance\n\nChief Economic Adviser to the Government of India
## 16 Deputy Governor of the Reserve Bank of India\n\nChairman of the Allahabad Bank
## 17 Finance Secretary\n\nExecutive Director at the International Monetary Fund
## 18 Finance Secretary
## 19 Deputy Governor of the Reserve Bank of India
## 20 Finance Secretary\n\nBanking Secretary\n\nChief Economic Adviser to the Government of India
## 21 Executive Director at the International Monetary Fund\n\nDeputy Governor of the Reserve Bank of India
## 22 Finance Secretary\n\nMember-Secretary of the Prime Minister's Economic Advisory Council
## 23 Chief Economic Adviser to the Government of India
## 24 Deputy Governor of the Reserve Bank
## 25 Member of the Fifteenth Finance Commission\nSherpa of India to the G20\nEconomic Affairs Secretary\nRevenue Secretary
## Reference(s)
## 1 [1]
## 2 [2]
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23
## 24
## 25 [3][4][5]
profile %>%
separate(`Term in office`, into = c("term", "days")) %>%
select(Officeholder, term) %>%
arrange(desc(as.numeric(term)))
## Officeholder term
## 1 Benegal Rama Rau 2754
## 2 C. D. Deshmukh 2150
## 3 R. N. Malhotra 2147
## 4 Bimal Jalan 2114
## 5 James Braid Taylor 2057
## 6 P. C. Bhattacharya 1947
## 7 Y. Venugopal Reddy 1826
## 8 H. V. R. Iyengar 1825
## 9 D. Subbarao 1825
## 10 Sarukkai Jagannathan 1798
## 11 C. Rangarajan 1795
## 12 I. G. Patel 1749
## 13 Raghuram Rajan 1096
## 14 Lakshmi Kant Jha 1037
## 15 Urjit Patel 947
## 16 Manmohan Singh 851
## 17 Osborne Smith 821
## 18 S. Venkitaramanan 730
## 19 K. R. Puri 621
## 20 M. Narasimham 211
## 21 Shaktikanta Das 118
## 22 N. C. Sen Gupta 92
## 23 K. G. Ambegaonkar 45
## 24 B. N. Adarkar 42
## 25 Amitav Ghosh 20
## # A tibble: 9 x 2
## Background n
## <chr> <int>
## 1 "" 1
## 2 Banker 2
## 3 Career Reserve Bank of India officer 1
## 4 Economist 7
## 5 IAS officer 4
## 6 ICS officer 7
## 7 Indian Administrative Service (IAS) officer 1
## 8 Indian Audit and Accounts Service officer 1
## 9 Indian Civil Service (ICS) officer 1
profile %>%
pull(Background) %>%
fct_collapse(
Bureaucrats = c("IAS officer", "ICS officer",
"Indian Administrative Service (IAS) officer",
"Indian Audit and Accounts Service officer",
"Indian Civil Service (ICS) officer"),
`No Info` = c(""),
`RBI Officer` = c("Career Reserve Bank of India officer")
) %>%
fct_count() %>%
rename(background = f, count = n) -> backgrounds
## # A tibble: 5 x 2
## background count
## <fct> <int>
## 1 No Info 1
## 2 Banker 2
## 3 RBI Officer 1
## 4 Economist 7
## 5 Bureaucrats 14