Fall 2024 Webscraping Workshop Survey Responses

Number of responses

Code
library(tidyverse)
library(bslib)
library(shiny)
library(bsicons)
source("scripts/helper_functions.R")

# list of workshop IDs to filter results
workshops <- c("2024-11-05-ucsb-webscraping")

results <- read_csv("data-joined/all_workshops.csv") %>% 
  filter(workshop %in% workshops)
  
# Fix comma separator
results <- results %>% 
  mutate(findout_select.pre = str_replace_all(
  findout_select.pre, 
  "Twitter, Facebook, etc.", 
  "Twitter; Facebook; etc."))

pre_survey <- results %>%
  select(ends_with(".pre"))

post_survey <- results %>%
  select(ends_with(".post"))

n_pre <- sum(apply(post_survey, 1, function(row) all(is.na(row))))
n_post <- sum(apply(pre_survey, 1, function(row) all(is.na(row))))
n_total <- nrow(results)
n_both <- nrow(results) - n_pre - n_post

layout_columns(
  value_box(
    title = "Total responses", value = n_total, ,
    theme = NULL, showcase = bs_icon("people-fill"), showcase_layout = "left center",
    full_screen = FALSE, fill = TRUE, height = NULL
  ),
  value_box(
    title = "Both pre- and post-", value = n_both, , theme = NULL,
    showcase = bs_icon("arrows-expand-vertical"), showcase_layout = "left center",
    full_screen = FALSE, fill = TRUE, height = NULL
  ),
  value_box(
    title = "Only pre-workshop", value = n_pre, ,
    theme = NULL, showcase = bs_icon("arrow-left-short"), showcase_layout = "left center",
    full_screen = FALSE, fill = TRUE, height = NULL
  ),
  value_box(
    title = "Only post-workshop", value = n_post, , theme = NULL,
    showcase = bs_icon("arrow-right-short"), showcase_layout = "left center",
    full_screen = FALSE, fill = TRUE, height = NULL
  )
)

Total responses

14

Both pre- and post-

3

Only pre-workshop

8

Only post-workshop

3

Departments

Code
depts <- results %>% select(dept_select.pre) %>% 
  separate_rows(dept_select.pre, sep=",") %>%
  mutate(dept_select.pre = str_trim(dept_select.pre)) %>%
  count(dept_select.pre, name = "count") %>% 
  mutate(percent = (count / (n_total - n_post)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(depts, aes(y=reorder(dept_select.pre, count), x=count)) +
    geom_col() +
    geom_label(aes(label = text, hjust = -0.1),
               size = 3) +
    labs(x = "# respondents", y = element_blank()) +  
    theme_minimal() +
    theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.y = element_blank()
      ) +
    expand_limits(x = c(0,max(depts$count)*1.1))

“Other” Departments

Code
other_depts <- results %>% 
  count(dept_other.pre, name = "count") %>% 
  drop_na() %>% 
  mutate(percent = (count / (n_total - n_post)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(other_depts, aes(y=reorder(dept_other.pre, count), x=count)) +
    geom_col() +
    geom_label(aes(label = text, hjust = -0.1),
               size = 3) +
    labs(x = "# respondents", y = element_blank()) + 
    theme_minimal() +
    theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.y = element_blank()
      ) +
    expand_limits(x = c(0,max(other_depts$count)*1.1))
Warning in max(other_depts$count): no non-missing arguments to max; returning
-Inf

Current occupation / Career stage

Code
ocup <- results %>% select(occupation.pre) %>% 
  separate_rows(occupation.pre, sep=",") %>%
  mutate(occupation.pre = str_trim(occupation.pre)) %>%
  count(occupation.pre, name = "count") %>% 
  drop_na() %>% 
  mutate(percent = (count / (n_total - n_post)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(ocup, aes(y=reorder(occupation.pre, count), x=count)) +
    geom_col() +
    geom_label(aes(label = text, hjust = -0.1),
               size = 3) +
    labs(x = "# respondents", y = element_blank()) + 
    theme_minimal() +
    theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.y = element_blank()
      ) +
    expand_limits(x = c(0,max(ocup$count)*1.2))

Motivation - Why are you participating in this workshop?

Code
motiv <- results %>% select(motivation_select.pre) %>% 
  separate_rows(motivation_select.pre, sep=",")  %>% 
  mutate(motivation_select.pre = str_trim(motivation_select.pre)) %>%
  count(motivation_select.pre, name = "count") %>% 
  drop_na() %>% 
  mutate(percent = (count / (n_total - n_post)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(motiv, aes(y=reorder(motivation_select.pre, count), x=count)) +
    geom_col() +
    geom_label(aes(label = text, hjust = -0.1),
               size = 3) +
    labs(x = "# respondents", y = element_blank()) + 
    theme_minimal() +
    theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.y = element_blank()
      ) +
    expand_limits(x = c(0,max(motiv$count)*1.2))

How did you find out about this workshop?

Code
findw <- results %>% select(findout_select.pre) %>% 
  separate_rows(findout_select.pre, sep=",")  %>% 
  mutate(findout_select.pre = str_trim(findout_select.pre)) %>%
  count(findout_select.pre, name = "count") %>% 
  drop_na() %>% 
  mutate(percent = (count / (n_total - n_post)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(findw, aes(y=reorder(findout_select.pre, count), x=count)) +
    geom_col() +
    geom_label(aes(label = text, hjust = -0.1),
               size = 3) +
    labs(x = "# respondents", y = element_blank()) + 
    theme_minimal() +
    theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.y = element_blank()
      ) +
    expand_limits(x = c(0,max(findw$count)*1.2))

What you most hope to learn?

Code
results %>% group_by(workshop) %>% 
  select(workshop, hopes.pre) %>% 
  drop_na()
workshop hopes.pre
2024-11-05-ucsb-webscraping A more efficient way of web scraping
2024-11-05-ucsb-webscraping how to do web scraping on my own on whatever site I choose
2024-11-05-ucsb-webscraping scrape news
2024-11-05-ucsb-webscraping learning more about how python works and how I can apply it to my future research
2024-11-05-ucsb-webscraping Tableau
2024-11-05-ucsb-webscraping Effectively extract information from the web
2024-11-05-ucsb-webscraping I have no experience in web scraping and don’t anticipate needing it for my research, so I’m just here to learn anything. Would love to learn something that I can be turned into a personal project.
2024-11-05-ucsb-webscraping Intermediate coding that is applicable

Learning environment in the workshop

Code
orderedq <- c("Strongly Disagree", "Somewhat Disagree", "Neither Agree or Disagree","Somewhat Agree", "Strongly Agree")
addNA(orderedq)
Code
agree_questions <- results %>% 
  select(join_key, agree_apply.post,    agree_comfortable.post, agree_clearanswers.post,
         agree_instr_enthusiasm.post, agree_instr_interaction.post, agree_instr_knowledge.post
) %>% 
  filter(!if_all(-join_key, is.na))

n_agree_questions <- nrow(agree_questions)
  
agree_questions <- agree_questions %>%
  pivot_longer(cols = -join_key, names_to = "Question", values_to = "Response") %>% 
  mutate(Response = factor(Response, levels = orderedq),
         Question = recode(Question,
                     "agree_apply.post" = "Can immediatly apply 
 what they learned",
                     "agree_comfortable.post" = "Comfortable learning in 
 the workshop environment",
                     "agree_clearanswers.post" = "Got clear answers 
 from instructors",
                     "agree_instr_enthusiasm.post" = "Instructors were enthusiastic",
                     "agree_instr_interaction.post" = "Comfortable interacting 
 with instructors",
                     "agree_instr_knowledge.post" = "Instructors were knowledgeable 
 about the material"
      ))

summary_data <- agree_questions %>%
  count(Question, Response, name = "count") %>% 
  mutate(percent = (count / n_agree_questions) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(summary_data, aes(x = Question, y = count, fill = Response)) +
  geom_col(position = "fill", color = "black", show.legend = TRUE) +
  scale_y_continuous(labels = scales::percent_format()) + 
  scale_fill_manual(values = c("Strongly Disagree" = "#d01c8b", 
                               "Somewhat Disagree" = "#f1b6da", 
                               "Neither Agree or Disagree" = "#f7f7f7", 
                               "Somewhat Agree" = "#b8e186", 
                               "Strongly Agree" = "#4dac26"), 
                    na.translate = TRUE, na.value = "#cccccc", 
                    breaks = orderedq, drop = FALSE) +
  geom_text(aes(label = text), size = 3,
             position = position_fill(vjust = 0.5)) +
  labs(y = "# respondents (Percentage)", x = element_blank(), fill = "Responses",
       subtitle = paste0("Number of responses: ", n_agree_questions)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.subtitle = element_text(hjust = 0.5, size = 12))

How an instructor or helper affected your learning experience

Code
results %>% 
  group_by(workshop) %>% 
  select(workshop, instructor_example.post) %>%
  drop_na()
workshop instructor_example.post
2024-11-05-ucsb-webscraping I felt very comfortable asking for help, because there was a system put in place (red/blue sticky notes and multiple helpers standing around) that encouraged questions and asking for help. When I did ask, I got a thorough, easy-to-understand explanation.
2024-11-05-ucsb-webscraping offered help right away when I raised my hand/put a sticky to indicate that I’m struggling
2024-11-05-ucsb-webscraping very hands-on, helpful, engaging
2024-11-05-ucsb-webscraping The instructor and helpers answered all the questions we had in a respectable and clear manner. It was a safe space for learning.

Skills and perception comparison

Code
# Calculate mean scores and make graph for all respondents (only_matched=FALSE)
tryCatch(
  {
mean_nresp <- get_mean_scores_nresp(results, only_matched=FALSE)
graph_pre_post(mean_nresp$mean_scores, mean_nresp$n_resp_pre, mean_nresp$n_resp_post, mean_nresp$n_resp_pre_post, only_matched=FALSE)
},
error = function(cond) {
message("Could not do the plots as there are no pre or post results to show")
}
)

Code
# Calculate mean scores and make graph for only matched respondents in pre and post (only_matched=TRUE)
tryCatch(
  {
mean_nresp <- get_mean_scores_nresp(results, only_matched=TRUE)
graph_pre_post(mean_nresp$mean_scores, mean_nresp$n_resp_pre, mean_nresp$n_resp_post, mean_nresp$n_resp_pre_post, only_matched=TRUE)
},
error = function(cond) {
message("Could not do the plots as there are no pre or post results to show")
}
)

Workshop Strengths

Code
results %>% 
  group_by(workshop) %>% 
  select(workshop, workshop_strengths.post) %>% 
  drop_na()
workshop workshop_strengths.post
2024-11-05-ucsb-webscraping beginner-friendly, strong helping/question-answering system, interesting and practical subject, online resources available to make it easier to follow along
2024-11-05-ucsb-webscraping Nice and slow, but also keeps everything interesting at the same time; Very detailed about how different parts of code works
2024-11-05-ucsb-webscraping friendly and active engagement.
2024-11-05-ucsb-webscraping get timely help
2024-11-05-ucsb-webscraping Having helpers made the workshop run smoothly. ; Coding along the instructor was useful.; The examples used were not trivial.

Ways to improve the workshop

Code
results %>% 
  group_by(workshop) %>% 
  select(workshop, workshop_improved.post) %>% 
  drop_na()
workshop workshop_improved.post
2024-11-05-ucsb-webscraping I mainly felt like it was a bit long, 4 hours spread out over two days. I think it could be shortened to 2-3 hours. Or instead, maybe provide a detailed schedule of what topics will be discussed, because I didn’t really need the first 30-40 min of the first workshop (I knew how HTML worked already, more or less) and was a bit disengaged during it. It’s difficult to accommodate different skill/knowledge levels and the workshop did that by going from the ground up with basics, which is nice if I were someone that needed that, I’m just not.; ; More about the length of the workshop: I did have to skip class to attend, though there are always trade-offs with scheduling at other times too. Making the workshop shorter would alleviate some schedule conflicts though.
2024-11-05-ucsb-webscraping more detailed.
2024-11-05-ucsb-webscraping more discussion time
2024-11-05-ucsb-webscraping I believe we all have different projects in mind. It would be nice if we can look at our own webpage that we are interested in and extract some data using the tools we have learned from the workshop.

How likely are you to recommend this workshop? Scale 0 - 10

Code
orderedq <- c("Detractor", "Passive", "Promoter")

nps <- results %>% 
  count(recommend_group.post, recommende_score.post, name = "count") %>% 
  drop_na() %>% 
  mutate(recommend_group.post = factor(recommend_group.post, levels = orderedq),
         percent = (count/sum(count)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

nps %>% 
ggplot(aes(x=recommende_score.post, y=count, fill=recommend_group.post)) +
  geom_col(color="black", show.legend = TRUE) +
  scale_fill_manual(values = c("Detractor" = "#af8dc3", "Passive" = "#f7f7f7", "Promoter" = "#7fbf7b"), breaks = c("Detractor", "Passive", "Promoter"), drop = FALSE) +
  geom_label(aes(label = text, vjust = -0.5), fill = "white", size= 3) +
  scale_x_continuous(breaks = 1:10) +
  labs(x = "NPS Score", y = "# respondents", subtitle = paste0("Number of responses: ", sum(nps$count), "
 Mean score: ", format(weighted.mean(nps$recommende_score.post, nps$count), digits = 3))) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank(),
    plot.subtitle = element_text(hjust = 0.5, size = 12)
  ) +
  expand_limits(x = c(1,10),
                y = c(0, max(nps$count)*1.1))

Topic Suggestions

Code
results %>% 
  group_by(workshop) %>% 
  select(workshop, suggest_topics.post) %>% 
  drop_na()
workshop suggest_topics.post
2024-11-05-ucsb-webscraping video editing, LaTeX
2024-11-05-ucsb-webscraping No suggestions