``````rm(list = ls())   # start from scratch

##############################################################################.
#
#      lapply ( SOME_LIST , SOME_FUNCTION )     ####
#
#
# The lapply function is used to "apply" a function to every entry in a list.
#
# The function is run as many times as there are entries in the list.
#
# lapply returns a list that contains the return values from each of the
# times the function was called.
#
# The "l" in "lapply" stands for "list", i.e. the return value of the
# "lapply" function is a list. There are other similar functions
# such as sapply, mapply and apply that have slightly different ways
# of working. For now we will just focus on "lapply". Later in the course
# we will learn about "sapply" and perhaps "mapply" and "apply".
#
# (see the examples below)
#
##############################################################################.

# create a list
students=c("joe","sue","bob","al","frank","mike","anne","clara"),
honors=c(FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE))
``````\$grades
[1]  80  85  72  95 100  89  65  84

\$students
[1] "joe"   "sue"   "bob"   "al"    "frank" "mike"  "anne"  "clara"

\$honors
[1] FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE``````
``````#-----------------------------------------------------------------------.
# Some built-in functions
#
# We'll use the following functions to help explain the lapply and sapply functions
#
#   length(SOME_VECTOR_OR_LIST) returns the number of items in the vector or the list
#   range(SOME_VECTOR) - returns c( min(SOME_VECTOR) , max(SOME_VECTOR))
#   summary(SOME_OBJECT) - returns a summary of what is in SOME_OBJECT
#-----------------------------------------------------------------------.

# length returns the number of items in a vector (or in a list)
``[1] 8``
``length( gradebook\$students )   # 8``
``[1] 8``
``length( gradebook\$honors )     # 8``
``[1] 8``
``````# range returns min and max of the vector
``[1]  65 100``
``range( gradebook\$students)   # "al" "sue"``
``[1] "al"  "sue"``
``range( gradebook\$honors)     #   0   1``
``[1] 0 1``
``````# summary returns a short summary of data.
# different results for different types of data.

summary( gradebook\$grades )    # see output below``````
``````   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
65.00   78.00   84.50   83.75   90.50  100.00 ``````
``````#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 65.00   78.00   84.50   83.75   90.50  100.00

summary( gradebook\$students )   # see output below``````
``````   Length     Class      Mode
8 character character ``````
``````# Length     Class      Mode
#      8 character character

summary( gradebook\$honors )    # see output below``````
``````   Mode   FALSE    TRUE
logical       5       3 ``````
``````#    Mode   FALSE    TRUE
# logical       5       3

#-----------------------------------------------------------------------.
# lapply ( SOME_LIST , SOME_FUNCTION )
#
# The lapply function takes two arguments:
#   X   - a list
#   FUN - the name of a function.
#
# lapply returns a new list that contains a many entries as there
# were in the list X. Each entry in the new list contains the results
# of "applying" the function X to an entry of the original list, X.
#
#    EXAMPLE:
#      > lst = list( c(10,20,30),           # length of this is 3
#                    c("apple", "orange"))  # length of this is 2
#
#      > lapply(X=lst, FUN=length) # list of 3 and 2
#      [[1]]
#      [1] 3
#
#      [[2]]
#      [1] 2
#-----------------------------------------------------------------------.

#........................................
# EXAMPLE 1 - lapply(gradebook, length)
#........................................

# Use the lapply function to automatically apply the length function to
# each of the items in the gradebook list.
# This returns a new list that contains all of the results.

lapply(gradebook, length)   # see results below``````
``````\$grades
[1] 8

\$students
[1] 8

\$honors
[1] 8``````
``````# \$grades
# [1] 8
#
# \$students
# [1] 8
#
# \$honors
# [1] 8

# lapply returns a list of the answers
mode(lapply(gradebook, length))  # "list"``````
``[1] "list"``
``````#........................................
# EXAMPLE 2 - lapply(gradebook, range)
#........................................

# Show the results of the range function on each of the items in the list
``[1]  65 100``
``range(gradebook\$students)   # "al" "sue"``
``[1] "al"  "sue"``
``range(gradebook\$honors)     #   0   1``
``[1] 0 1``
``````# apply range function to all items in the gradebook list

lapply(gradebook, range)   # see results below``````
``````\$grades
[1]  65 100

\$students
[1] "al"  "sue"

\$honors
[1] 0 1``````
``````# \$grades
# [1]  65 100
#
# \$students
# [1] "al"  "sue"
#
# \$honors
# [1] 0 1

#........................................
# EXAMPLE 3 - lapply(gradebook, summary)
#........................................

# summary returns a short summary of data.
# different results for different types of data.
summary( gradebook\$grades )    # see output below``````
``````   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
65.00   78.00   84.50   83.75   90.50  100.00 ``````
``````#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 65.00   78.00   84.50   83.75   90.50  100.00

summary( gradebook\$students )   # see output below``````
``````   Length     Class      Mode
8 character character ``````
``````# Length     Class      Mode
#      8 character character

summary( gradebook\$honors )    # see output below``````
``````   Mode   FALSE    TRUE
logical       5       3 ``````
``````#    Mode   FALSE    TRUE
# logical       5       3

# apply summary function to all items in the gradebook list
lapply(gradebook, summary)   # all of the above results in a single list``````
``````\$grades
Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
65.00   78.00   84.50   83.75   90.50  100.00

\$students
Length     Class      Mode
8 character character

\$honors
Mode   FALSE    TRUE
logical       5       3 ``````
``````# apply mode function to all items in the gradebook list
lapply(gradebook, mode)   # all of the above results in a single list``````
``````\$grades
[1] "numeric"

\$students
[1] "character"

\$honors
[1] "logical"``````
``````#---------------------------------------------------------------------------.
# lapply also works with nested lists
# (i.e. lists that contain other lists)
#---------------------------------------------------------------------------.

rm(list = ls())

classes = list( year = 2021,
semester = "fall",
section1 = list ( students = c("abe","bob","charlie"),
test1 = c(70,80,90),
test2 = c(75,85,95)),
section2 = list( students = c("fran", "anne", "sue", "bertha", "maxine"),
test1 = c(100,90,80,70,60),
test2 = c(95,85,75,65,55),
test3 = c(93,83,73,63,53)) )

length(classes)   # 4  (classes contains 4 objects)``````
``[1] 4``
``lapply(classes, length) # a list that contains 1,1,3,4, i.e. the length of each object in classes``
``````\$year
[1] 1

\$semester
[1] 1

\$section1
[1] 3

\$section2
[1] 4``````
``````# remember the str function is very helpful for seeing the structure of complex lists
str(classes)``````
``````List of 4
\$ year    : num 2021
\$ semester: chr "fall"
\$ section1:List of 3
..\$ students: chr [1:3] "abe" "bob" "charlie"
..\$ test1   : num [1:3] 70 80 90
..\$ test2   : num [1:3] 75 85 95
\$ section2:List of 4
..\$ students: chr [1:5] "fran" "anne" "sue" "bertha" ...
..\$ test1   : num [1:5] 100 90 80 70 60
..\$ test2   : num [1:5] 95 85 75 65 55
..\$ test3   : num [1:5] 93 83 73 63 53``````
``````#-----------------------------------------------------------------------.
# NAMED LISTS WITH lapply VS UNNAMED LISTS
#
# The return value of lapply will be a named list only if the
# original list has names.
#-----------------------------------------------------------------------.

hasNames = list( fruit=c("apple","orange","pear","plum"),
evens=seq(2,10,by=2),
odds=seq(1,30,by=2),
someLogicals=c(TRUE,FALSE,TRUE))
hasNames``````
``````\$fruit
[1] "apple"  "orange" "pear"   "plum"

\$evens
[1]  2  4  6  8 10

\$odds
[1]  1  3  5  7  9 11 13 15 17 19 21 23 25 27 29

\$someLogicals
[1]  TRUE FALSE  TRUE``````
``str(hasNames)``
``````List of 4
\$ fruit       : chr [1:4] "apple" "orange" "pear" "plum"
\$ evens       : num [1:5] 2 4 6 8 10
\$ odds        : num [1:15] 1 3 5 7 9 11 13 15 17 19 ...
\$ someLogicals: logi [1:3] TRUE FALSE TRUE``````
``length(hasNames)``
``[1] 4``
``lapply(hasNames, length)``
``````\$fruit
[1] 4

\$evens
[1] 5

\$odds
[1] 15

\$someLogicals
[1] 3``````
``lapply(hasNames, range)``
``````\$fruit
[1] "apple" "plum"

\$evens
[1]  2 10

\$odds
[1]  1 29

\$someLogicals
[1] 0 1``````
``lapply(hasNames, summary)``
``````\$fruit
Length     Class      Mode
4 character character

\$evens
Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
2       4       6       6       8      10

\$odds
Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
1       8      15      15      22      29

\$someLogicals
Mode   FALSE    TRUE
logical       1       2 ``````
``````noNames = list( c("apple","orange","pear","plum"),
seq(2,10,by=2),
seq(1,30,by=2),
c(TRUE,FALSE,TRUE))
noNames``````
``````[[1]]
[1] "apple"  "orange" "pear"   "plum"

[[2]]
[1]  2  4  6  8 10

[[3]]
[1]  1  3  5  7  9 11 13 15 17 19 21 23 25 27 29

[[4]]
[1]  TRUE FALSE  TRUE``````
``str(noNames)``
``````List of 4
\$ : chr [1:4] "apple" "orange" "pear" "plum"
\$ : num [1:5] 2 4 6 8 10
\$ : num [1:15] 1 3 5 7 9 11 13 15 17 19 ...
\$ : logi [1:3] TRUE FALSE TRUE``````
``length(noNames)``
``[1] 4``
``lapply(noNames, length)``
``````[[1]]
[1] 4

[[2]]
[1] 5

[[3]]
[1] 15

[[4]]
[1] 3``````
``lapply(noNames, range)``
``````[[1]]
[1] "apple" "plum"

[[2]]
[1]  2 10

[[3]]
[1]  1 29

[[4]]
[1] 0 1``````
``lapply(noNames, summary)``
``````[[1]]
Length     Class      Mode
4 character character

[[2]]
Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
2       4       6       6       8      10

[[3]]
Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
1       8      15      15      22      29

[[4]]
Mode   FALSE    TRUE
logical       1       2 ``````
``````######################################################################.
# 2022 WILF - UP TO HERE - AFTER CLASS 17
######################################################################.

############################################################.
#   using custom functions with lapply
#
# You can use any function that contains one argument
# with lapply, even your own custom functions
############################################################.

stuff = list(grades = c(70,80,95,88,60),
students = c("larry", "zeke","charlie","yaakov","abe"),
age = c(17,23,20,20,21,19))

# define a function to find the 2nd largest value in a vector
secondLargest = function( vec ){
sort(vec)[length(vec)-1]
}

# Test the function
nums = c(10,1000,900,-55,23)
secondLargest(nums)   # 900``````
``[1] 900``
``````# Example - use our function the vectors in the list, stuff
secondLargest( stuff\$grades)    # 88``````
``[1] 88``
``secondLargest( stuff\$students)  # "yaakov"``
``[1] "yaakov"``
``secondLargest( stuff\$age)       # 21``
``[1] 21``
``````# Use our custom function with lapply to automatically apply the function
# to all entries in the list, stuff

lapply( stuff, secondLargest)    # list of the answers``````
``````\$grades
[1] 88

\$students
[1] "yaakov"

\$age
[1] 21``````
``````###########################################.
# functions defined in one line
#
# If a function only needs one line of code
# the {curly braces} aren't required.
###########################################.

# Another way to define the second largest function - all in one line
# (you don't the the {curly braces})
secondLargest = function ( vec ) sort(vec)[length(vec)-1]

# Test the function
nums = c(10,1000,900,-55,23)
secondLargest(nums)``````
``[1] 900``
``````###########################################.
# functions defined in one line
#
# If a function only needs one line of code
# the {curly braces} aren't required.
###########################################.

# create a list
students=c("joe","sue","bob","al","frank","mike","anne","clara"),
honors=c(FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE))
``````\$grades
[1]  80  85  72  95 100  89  65  84

\$students
[1] "joe"   "sue"   "bob"   "al"    "frank" "mike"  "anne"  "clara"

\$honors
[1] FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE``````
``````# If you want to define a function just to use with lapply you
# can define it directly in the call to lapply

# Use the secondLargest function we defined above
lapply(gradebook, secondLargest)   # we defined gradebook above``````
``````\$grades
[1] 95

\$students
[1] "mike"

\$honors
[1] TRUE``````
``````# This also works
lapply(gradebook, function ( vec ) {
sort(vec)[length(vec)-1]
})``````
``````\$grades
[1] 95

\$students
[1] "mike"

\$honors
[1] TRUE``````
``````# So does this
lapply(gradebook, function ( vec ) sort(vec)[length(vec)-1] )``````
``````\$grades
[1] 95

\$students
[1] "mike"

\$honors
[1] TRUE``````
``````# You can capture the output of lapply in a variable if you like
answers = lapply(gradebook, function ( vec ) sort(vec)[length(vec)-1] )

``````\$grades
[1] 95

\$students
[1] "mike"

\$honors
[1] TRUE``````
``answers\$students``
``[1] "mike"``
``answers[[1]] ``
``[1] 95``
``mode(answers)   # "list"``
``[1] "list"``
``````#------------------------------------------------------------------------.
# QUESTION
#
# stuff is a list.
# Write a single line of code that returns the number of
# numeric vectors that are contained in a single list.
#
# HINT: Use the is.numeric function, lapply and unlist
#
# EXAMPLE
#
#   > stuff = list(c(100,200,300),
#                 c("apple", "orange", "pear"),
#                 seq(10,100,by=5),
#                 c(TRUE, FALSE))
#
#   > YOUR CODE GOES HERE
#   [1] 2
#------------------------------------------------------------------------.

# Setup the data
stuff = list(c(100,200,300),
c("apple", "orange", "pear"),
seq(10,100,by=5),
c(TRUE, FALSE))
stuff``````
``````[[1]]
[1] 100 200 300

[[2]]
[1] "apple"  "orange" "pear"

[[3]]
[1]  10  15  20  25  30  35  40  45  50  55  60  65  70  75  80  85  90  95 100

[[4]]
[1]  TRUE FALSE``````
``````# ANSWER
sum( unlist( lapply(stuff, is.numeric) )  )``````
``[1] 2``
``````# Or you can break up into multiple lines to make it easier to read.
sum(
unlist(
lapply(stuff, is.numeric)
)
)``````
``[1] 2``
``````#------------------------------------------------------------------------.
# QUESTION
#
# stuff is a list.
# Write a single line of code that creates a new variable,
# named numericStuff, that contains just a new list with
# just the numeric vectors from stuff.
#
# EXAMPLE:
# > stuff = list( seq(10,20,length.out=4) ,   # numeric
#                 NULL,                       # NULL
#                 c("apple", "orange"),       # character
#                 rep(5, 3),                  # numeric
#                 1:4 > pi,                   # logical
#                 NULL,                       # NULL
#                 paste0(letters[1:5], 1:5),  # character
#                 2 ^ (1:5) )                 # numeric
#
# > numericStuff = YOUR CODE GOES HERE
#
# > numericStuff
# [[1]]
# [1] 10.00000 13.33333 16.66667 20.00000
#
# [[2]]
# [1] 5 5 5
#
# [[3]]
# [1]  2  4  8 16 32
#------------------------------------------------------------------------.

stuff = list( seq(10,20,length.out=4) ,   # numeric
NULL,                       # NULL
c("apple", "orange"),       # character
rep(5, 3),                  # numeric
1:4 > pi,                   # logical
NULL,                       # NULL
paste0(letters[1:5], 1:5),  # character
2 ^ (1:5))                  # numeric

stuff``````
``````[[1]]
[1] 10.00000 13.33333 16.66667 20.00000

[[2]]
NULL

[[3]]
[1] "apple"  "orange"

[[4]]
[1] 5 5 5

[[5]]
[1] FALSE FALSE FALSE  TRUE

[[6]]
NULL

[[7]]
[1] "a1" "b2" "c3" "d4" "e5"

[[8]]
[1]  2  4  8 16 32``````
``````numericStuff = stuff [        # ANSWER
unlist (
lapply(stuff, is.numeric)
)
]

numericStuff``````
``````[[1]]
[1] 10.00000 13.33333 16.66667 20.00000

[[2]]
[1] 5 5 5

[[3]]
[1]  2  4  8 16 32``````
``````#------------------------------------------------------------------------.
# QUESTION
#
# (see previous question)
# Same idea as previous question, but this time create a vector
# called someStuff that contains just the logical and numeric vectors
# from stuff.
#
# HINT: one way to do this is to create a custom function that
#       takes a single argument. The custom function should
#       return TRUE if the argument contains a numeric or a logical vector
#       and FALSE otherwise.
#
# EXAMPLE:
# > stuff = list( seq(10,20,length.out=4) ,   # numeric
#                 NULL,                       # NULL
#                 c("apple", "orange"),       # character
#                 rep(5, 3),                  # numeric
#                 1:4 > pi,                   # logical
#                 NULL,                       # NULL
#                 paste0(letters[1:5], 1:5),  # character
#                 2 ^ (1:5) )                 # numeric
#
# > someStuff = YOUR CODE GOES HERE
#
# > someStuff
# [[1]]
# [1] 10.00000 13.33333 16.66667 20.00000
#
# [[2]]
# [1] 5 5 5
#
# [[3]]
# [1]  2  4  8 16 32
#------------------------------------------------------------------------.

# setup some data
stuff = list( seq(10,20,length.out=4) ,   # numeric
NULL,                       # NULL
c("apple", "orange"),       # character
rep(5, 3),                  # numeric
1:4 > pi,                   # logical
NULL,                       # NULL
paste0(letters[1:5], 1:5),  # character
2 ^ (1:5))                  # numeric

stuff``````
``````[[1]]
[1] 10.00000 13.33333 16.66667 20.00000

[[2]]
NULL

[[3]]
[1] "apple"  "orange"

[[4]]
[1] 5 5 5

[[5]]
[1] FALSE FALSE FALSE  TRUE

[[6]]
NULL

[[7]]
[1] "a1" "b2" "c3" "d4" "e5"

[[8]]
[1]  2  4  8 16 32``````
``````# STEP 1
# Create a function that returns TRUE if the arguemnt is either logical or numeric
isLogicalOrNumeric = function( something ){
is.logical(something) | is.numeric(something)
}

# STEP 2 - use the function with lapply
tfList = lapply(stuff, isLogicalOrNumeric)   # list of TRUE/FALSE values
tfList ``````
``````[[1]]
[1] TRUE

[[2]]
[1] FALSE

[[3]]
[1] FALSE

[[4]]
[1] TRUE

[[5]]
[1] TRUE

[[6]]
[1] FALSE

[[7]]
[1] FALSE

[[8]]
[1] TRUE``````
``````# STEP 3 - unlist the results to get a vector
tfVector = unlist ( tfList )  # vector of TRUE/FALSE values
tvVector``````
``Error in eval(expr, envir, enclos): object 'tvVector' not found``
``````# STEP 4 - use the logical vector to get just the logical and numeric entries in the list
someStuff = stuff[tfVector]

# ALL STEPS TOGETHER IN ONE COMMAND
# You can create an "anonymous" function directly in the call to lapply.
someStuff = stuff [
unlist (
lapply(stuff,  function (something) is.logical(something)|is.numeric(something)
)
)
]

# or alternatively all in one line
someStuff = stuff[unlist ( lapply(stuff,  function (something) is.logical(something)|is.numeric(something) ) ) ]

someStuff``````
``````[[1]]
[1] 10.00000 13.33333 16.66667 20.00000

[[2]]
[1] 5 5 5

[[3]]
[1] FALSE FALSE FALSE  TRUE

[[4]]
[1]  2  4  8 16 32``````
``````###
### END OF FILE
### INFORMATION BELOW IS NOT FINISHED
###

#------------------------------------------------------------------------.
# QUESTION
#
# Write a function called modeCount. The function
# takes a single argument, lst, which is a list.
# The function returns a named vector that contains
# the number of entries in the lst that of each of the
# following modes: character, logical, numeric, list, NULL.
#
# EXAMPLE:
# > stuff = list( seq(10,20,length.out=4) ,   # numeric
#                 NULL,                       # NULL
#                 c("apple", "orange"),       # character
#                 rep(5, 3),                  # numeric
#                 1:4 > pi,                   # logical
#                 NULL,                       # NULL
#                 paste0(letters[1:5], 1:5),  # character
#                 2 ^ 1:5)                    # numeric
#
# > YOUR CODE GOES HERE

#------------------------------------------------------------------------.

stuff = list( seq(10,20,length.out=4) ,      # numeric
NULL,                       # NULL
c("apple", "orange"),       # character
rep(5, 3),                  # numeric
1:4 > pi,                   # logical
NULL,                       # NULL
paste0(letters[1:5], 1:5),  # character
2 ^ 1:5)                    # numeric

sum ( unlist ( lapply(stuff, is.numeric) ) )``````
``[1] 3``