Winter 2024 Webscraping Workshop Survey Responses

Number of responses

Code
library(tidyverse)
library(bslib)
library(shiny)
library(bsicons)
source("scripts/helper_functions.R")

# list of workshop IDs to filter results
workshops <- c("2024-02-27-ucsb-webscraping")

results <- read_csv("data-joined/all_workshops.csv") %>% 
  filter(workshop %in% workshops)
  
# Fix comma separator
results <- results %>% 
  mutate(findout_select.pre = str_replace_all(
  findout_select.pre, 
  "Twitter, Facebook, etc.", 
  "Twitter; Facebook; etc."))

pre_survey <- results %>%
  select(ends_with(".pre"))

post_survey <- results %>%
  select(ends_with(".post"))

n_pre <- sum(apply(post_survey, 1, function(row) all(is.na(row))))
n_post <- sum(apply(pre_survey, 1, function(row) all(is.na(row))))
n_total <- nrow(results)
n_both <- nrow(results) - n_pre - n_post

layout_columns(
  value_box(
    title = "Total responses", value = n_total, ,
    theme = NULL, showcase = bs_icon("people-fill"), showcase_layout = "left center",
    full_screen = FALSE, fill = TRUE, height = NULL
  ),
  value_box(
    title = "Both pre- and post-", value = n_both, , theme = NULL,
    showcase = bs_icon("arrows-expand-vertical"), showcase_layout = "left center",
    full_screen = FALSE, fill = TRUE, height = NULL
  ),
  value_box(
    title = "Only pre-workshop", value = n_pre, ,
    theme = NULL, showcase = bs_icon("arrow-left-short"), showcase_layout = "left center",
    full_screen = FALSE, fill = TRUE, height = NULL
  ),
  value_box(
    title = "Only post-workshop", value = n_post, , theme = NULL,
    showcase = bs_icon("arrow-right-short"), showcase_layout = "left center",
    full_screen = FALSE, fill = TRUE, height = NULL
  )
)

Total responses

14

Both pre- and post-

6

Only pre-workshop

7

Only post-workshop

1

Departments

Code
depts <- results %>% select(dept_select.pre) %>% 
  separate_rows(dept_select.pre, sep=",") %>%
  mutate(dept_select.pre = str_trim(dept_select.pre)) %>%
  count(dept_select.pre, name = "count") %>% 
  mutate(percent = (count / (n_total - n_post)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(depts, aes(y=reorder(dept_select.pre, count), x=count)) +
    geom_col() +
    geom_label(aes(label = text, hjust = -0.1),
               size = 3) +
    labs(x = "# respondents", y = element_blank()) +  
    theme_minimal() +
    theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.y = element_blank()
      ) +
    expand_limits(x = c(0,max(depts$count)*1.1))

“Other” Departments

Code
other_depts <- results %>% 
  count(dept_other.pre, name = "count") %>% 
  drop_na() %>% 
  mutate(percent = (count / (n_total - n_post)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(other_depts, aes(y=reorder(dept_other.pre, count), x=count)) +
    geom_col() +
    geom_label(aes(label = text, hjust = -0.1),
               size = 3) +
    labs(x = "# respondents", y = element_blank()) + 
    theme_minimal() +
    theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.y = element_blank()
      ) +
    expand_limits(x = c(0,max(other_depts$count)*1.1))

Current occupation / Career stage

Code
ocup <- results %>% select(occupation.pre) %>% 
  separate_rows(occupation.pre, sep=",") %>%
  mutate(occupation.pre = str_trim(occupation.pre)) %>%
  count(occupation.pre, name = "count") %>% 
  drop_na() %>% 
  mutate(percent = (count / (n_total - n_post)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(ocup, aes(y=reorder(occupation.pre, count), x=count)) +
    geom_col() +
    geom_label(aes(label = text, hjust = -0.1),
               size = 3) +
    labs(x = "# respondents", y = element_blank()) + 
    theme_minimal() +
    theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.y = element_blank()
      ) +
    expand_limits(x = c(0,max(ocup$count)*1.2))

Motivation - Why are you participating in this workshop?

Code
motiv <- results %>% select(motivation_select.pre) %>% 
  separate_rows(motivation_select.pre, sep=",")  %>% 
  mutate(motivation_select.pre = str_trim(motivation_select.pre)) %>%
  count(motivation_select.pre, name = "count") %>% 
  drop_na() %>% 
  mutate(percent = (count / (n_total - n_post)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(motiv, aes(y=reorder(motivation_select.pre, count), x=count)) +
    geom_col() +
    geom_label(aes(label = text, hjust = -0.1),
               size = 3) +
    labs(x = "# respondents", y = element_blank()) + 
    theme_minimal() +
    theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.y = element_blank()
      ) +
    expand_limits(x = c(0,max(motiv$count)*1.2))

How did you find out about this workshop?

Code
findw <- results %>% select(findout_select.pre) %>% 
  separate_rows(findout_select.pre, sep=",")  %>% 
  mutate(findout_select.pre = str_trim(findout_select.pre)) %>%
  count(findout_select.pre, name = "count") %>% 
  drop_na() %>% 
  mutate(percent = (count / (n_total - n_post)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(findw, aes(y=reorder(findout_select.pre, count), x=count)) +
    geom_col() +
    geom_label(aes(label = text, hjust = -0.1),
               size = 3) +
    labs(x = "# respondents", y = element_blank()) + 
    theme_minimal() +
    theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.y = element_blank()
      ) +
    expand_limits(x = c(0,max(findw$count)*1.2))

What you most hope to learn?

Code
results %>% group_by(workshop) %>% 
  select(workshop, hopes.pre) %>% 
  drop_na()
workshop hopes.pre
2024-02-27-ucsb-webscraping More comfortable writing scripts for data mining and better data and file management.
2024-02-27-ucsb-webscraping webscraping tools
2024-02-27-ucsb-webscraping Learning some basic data analysis techniques
2024-02-27-ucsb-webscraping How scraping can be done, easily
2024-02-27-ucsb-webscraping how to do it!
2024-02-27-ucsb-webscraping It’s been a while since I took my first R workshop and I don’t have a chance to use it much. For now I want to get a handle on the basics and enough vocabulary that when I run into trouble I can find answers on how to fix things in a forum or ask for help.
2024-02-27-ucsb-webscraping Techniques for gathering, organizing, and analyzing materials from NGO websites
2024-02-27-ucsb-webscraping How to use Scraper and begin to understand R and Python
2024-02-27-ucsb-webscraping How to apply webscraping to get jobs in the future
2024-02-27-ucsb-webscraping Quantitative analysis for online forums (keywords etc)
2024-02-27-ucsb-webscraping Would like to learn how to use APIs

Learning environment in the workshop

Code
orderedq <- c("Strongly Disagree", "Somewhat Disagree", "Neither Agree or Disagree","Somewhat Agree", "Strongly Agree")
addNA(orderedq)
Code
agree_questions <- results %>% 
  select(join_key, agree_apply.post,    agree_comfortable.post, agree_clearanswers.post,
         agree_instr_enthusiasm.post, agree_instr_interaction.post, agree_instr_knowledge.post
) %>% 
  filter(!if_all(-join_key, is.na))

n_agree_questions <- nrow(agree_questions)
  
agree_questions <- agree_questions %>%
  pivot_longer(cols = -join_key, names_to = "Question", values_to = "Response") %>% 
  mutate(Response = factor(Response, levels = orderedq),
         Question = recode(Question,
                     "agree_apply.post" = "Can immediatly apply 
 what they learned",
                     "agree_comfortable.post" = "Comfortable learning in 
 the workshop environment",
                     "agree_clearanswers.post" = "Got clear answers 
 from instructors",
                     "agree_instr_enthusiasm.post" = "Instructors were enthusiastic",
                     "agree_instr_interaction.post" = "Comfortable interacting 
 with instructors",
                     "agree_instr_knowledge.post" = "Instructors were knowledgeable 
 about the material"
      ))

summary_data <- agree_questions %>%
  count(Question, Response, name = "count") %>% 
  mutate(percent = (count / n_agree_questions) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

ggplot(summary_data, aes(x = Question, y = count, fill = Response)) +
  geom_col(position = "fill", color = "black", show.legend = TRUE) +
  scale_y_continuous(labels = scales::percent_format()) + 
  scale_fill_manual(values = c("Strongly Disagree" = "#d01c8b", 
                               "Somewhat Disagree" = "#f1b6da", 
                               "Neither Agree or Disagree" = "#f7f7f7", 
                               "Somewhat Agree" = "#b8e186", 
                               "Strongly Agree" = "#4dac26"), 
                    na.translate = TRUE, na.value = "#cccccc", 
                    breaks = orderedq, drop = FALSE) +
  geom_text(aes(label = text), size = 3,
             position = position_fill(vjust = 0.5)) +
  labs(y = "# respondents (Percentage)", x = element_blank(), fill = "Responses",
       subtitle = paste0("Number of responses: ", n_agree_questions)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.subtitle = element_text(hjust = 0.5, size = 12))

How an instructor or helper affected your learning experience

Code
results %>% 
  group_by(workshop) %>% 
  select(workshop, instructor_example.post) %>%
  drop_na()
workshop instructor_example.post
2024-02-27-ucsb-webscraping providing time to research ourselves and provide answers after
2024-02-27-ucsb-webscraping Very knowledgeable, keenly mindful of learners’ needs
2024-02-27-ucsb-webscraping both instructors seemed to me to move really fast; they were doing things on the screen that students were supposed to be also doing on their own screens, but I found it hard to follow their actions. Plus, they really assumed people already knew the Xpath terms and such. This is what an early blurb said: “introducing tools for turning web pages into analyzable data. We will explore common web scraping scenarios and learn how to use a web browser to extract data from websites without needing to write code.” Huh.
2024-02-27-ucsb-webscraping Answered my question throughly and explained it in a second way
2024-02-27-ucsb-webscraping The workshop was ok, I don’t feel like I learned 3 hours worth of material. The information provided seemed to be by rote and I didn’t get much of a sense of expertise or a way to understand underlying theory from the instructors.

Skills and perception comparison

Code
# Calculate mean scores and make graph for all respondents (only_matched=FALSE)
tryCatch(
  {
mean_nresp <- get_mean_scores_nresp(results, only_matched=FALSE)
graph_pre_post(mean_nresp$mean_scores, mean_nresp$n_resp_pre, mean_nresp$n_resp_post, mean_nresp$n_resp_pre_post, only_matched=FALSE)
},
error = function(cond) {
message("Could not do the plots as there are no pre or post results to show")
}
)

Code
# Calculate mean scores and make graph for only matched respondents in pre and post (only_matched=TRUE)
tryCatch(
  {
mean_nresp <- get_mean_scores_nresp(results, only_matched=TRUE)
graph_pre_post(mean_nresp$mean_scores, mean_nresp$n_resp_pre, mean_nresp$n_resp_post, mean_nresp$n_resp_pre_post, only_matched=TRUE)
},
error = function(cond) {
message("Could not do the plots as there are no pre or post results to show")
}
)

Workshop Strengths

Code
results %>% 
  group_by(workshop) %>% 
  select(workshop, workshop_strengths.post) %>% 
  drop_na()
workshop workshop_strengths.post
2024-02-27-ucsb-webscraping good baseline understanding of how to do webscraping;
2024-02-27-ucsb-webscraping Practicality
2024-02-27-ucsb-webscraping the first bit, the kind of intense introduction to XPath and its use in exploring HTML pages was probably good for people well prepared in advance. Tough if you don’t really know the HTML terminology.
2024-02-27-ucsb-webscraping Learned a lot of new information, taught in a way that was easy and accessible.
2024-02-27-ucsb-webscraping I’m sorry I can’t think of any right now - I feel like I would have been just as well served with a link to the tool and a web tutorial.

Ways to improve the workshop

Code
results %>% 
  group_by(workshop) %>% 
  select(workshop, workshop_improved.post) %>% 
  drop_na()
workshop workshop_improved.post
2024-02-27-ucsb-webscraping Maybe go over more examples of how webscraping can be used in research settings.
2024-02-27-ucsb-webscraping make it really introductory? Persuade me that scraping a web site is quicker and faster than just copying it and pasting it into excel or word and then extracting what I want. Don’t assume I know what you mean by an “anchor.”
2024-02-27-ucsb-webscraping Better explanation of possible uses for these tools
2024-02-27-ucsb-webscraping It felt like the XPath instructor didn’t know much about the Scraper tool, but I think that doesn’t necessarily need to be resolved more even knowledge of the entire workshop maybe
2024-02-27-ucsb-webscraping I understand starting us with the basics but talking us through or referring us to how it could be built into something more complex is a loop I was expecting to see completed but never came about. Even if there’s no time to demo starting with something that we’d actually want to apply, then walking the more complex topic back would have been more useful.

How likely are you to recommend this workshop? Scale 0 - 10

Code
orderedq <- c("Detractor", "Passive", "Promoter")

nps <- results %>% 
  count(recommend_group.post, recommende_score.post, name = "count") %>% 
  drop_na() %>% 
  mutate(recommend_group.post = factor(recommend_group.post, levels = orderedq),
         percent = (count/sum(count)) * 100,
         text = sprintf("%.0f (%.0f%%)", count, percent))

nps %>% 
ggplot(aes(x=recommende_score.post, y=count, fill=recommend_group.post)) +
  geom_col(color="black", show.legend = TRUE) +
  scale_fill_manual(values = c("Detractor" = "#af8dc3", "Passive" = "#f7f7f7", "Promoter" = "#7fbf7b"), breaks = c("Detractor", "Passive", "Promoter"), drop = FALSE) +
  geom_label(aes(label = text, vjust = -0.5), fill = "white", size= 3) +
  scale_x_continuous(breaks = 1:10) +
  labs(x = "NPS Score", y = "# respondents", subtitle = paste0("Number of responses: ", sum(nps$count), "
 Mean score: ", format(weighted.mean(nps$recommende_score.post, nps$count), digits = 3))) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank(),
    plot.subtitle = element_text(hjust = 0.5, size = 12)
  ) +
  expand_limits(x = c(1,10),
                y = c(0, max(nps$count)*1.1))

Topic Suggestions

Code
results %>% 
  group_by(workshop) %>% 
  select(workshop, suggest_topics.post) %>% 
  drop_na()
workshop suggest_topics.post
2024-02-27-ucsb-webscraping R studio and git
2024-02-27-ucsb-webscraping hard to say. I think there was a very wide range of skills and backgrounds in the room. So you quite likely hit the sweet spot. But I would need more background and slower pace.
2024-02-27-ucsb-webscraping Second session on scraping?
2024-02-27-ucsb-webscraping I’d be interested in the second part of Web scraping workshop
2024-02-27-ucsb-webscraping Text analysis tools