-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWOS_WC_extraction_for_vos_viewer.R
86 lines (71 loc) · 2.98 KB
/
WOS_WC_extraction_for_vos_viewer.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
library(plyr)
# UPDATE WORKING DIRECTORY AS NECESSARY
setwd("/Users/laurasheble/SD_WOS") #you will need to change the filepath on your machine
data <- "SD_WCs_312_noMtgAbstracts_05mar2015data.txt"
df <- read.table(data, na.strings = "na", header=TRUE, sep = "\t", stringsAsFactors=FALSE)
detach(df)
attach(df)
colnames(df)
df[0:10,"UT"]
# create a dataframe with just UT's and WC's
df_wc <- cbind(df["UT"], df["WC"])
# record orig dataframe dimensions to see how many rows
orig_dim <- dim(df_wc)
orig_dim
'''
## now use WCslist to look at list of SCs for each document, add a 1 to the df0 under the appropriate column if present
'''
### My Starter code: SC Extraction ###
# Get list of unique SCs
WCs <- df_wc[,2]
# WCs
WCslist <- lapply(WCs, strsplit, split="; ")
WCunique <- unique(unlist(WCslist, use.names = FALSE))
# Sort alphabetically - this seems pointless - gets turned back later
# note that apostrophes could be a problem
WCunique <- sort(WCunique)
# create a dataframe of 0s to add to the original df
df0 <- as.data.frame(matrix(0, ncol=length(WCunique), nrow=nrow(df_wc)))
colnames(df0) <- WCunique
df0 <- sapply(WCunique, function(x) as.integer(grepl(x, WCs)))
# df0
### column bind, minus the original SC field with all SCs
df <- cbind(df_wc[-2],df0)
### END SC Extraction ###Author PY Source Vol Page Cites proportion
### Overview of extracted SCs ###
df[1:2,]
dim(df)
# change to number of rows in my set
# ncol gets the number of columns, the first column is the WOS 'UT' field, which we don't want
col_ct <- ncol(df)
col_ct # 101
no_scsPerDoc <- rowSums(df[2:col_ct])
max(no_scsPerDoc) # 7
min(no_scsPerDoc) # 1
mean(no_scsPerDoc) # 2.070513
median(no_scsPerDoc) # 2
### Construct (Symmetric) Adjacency graphs ###
# co-occurrence (adjacency) with documents
df0_co_wc <- df0 %*% t(df0)
# co-occurrence (adjacency) with research areas
df0_co_wc2 <- t(df0) %*% df0
# above, with 0 on the diagonal:
df0_co_wc2d0 <- df0_co_wc2
diag(df0_co_wc2d0) <- 0
### End adjacency graph construction ###
### Write adjacency matrix to file for use with VosViewer ###
ints4rownames <- rep(1:dim(df0_co_wc2)[1])
df0_co_wc2_vos <- df0_co_wc2
rownames(df0_co_wc2_vos) <- ints4rownames
write.table(df0_co_wc2_vos, row.names = TRUE, col.names=FALSE, quote=FALSE, sep=",", file ="/Users/laurasheble/SD_WOS/SD_WOS_6mar2015query_WC_adjMatrixdf0_co_wc2_vos.csv")
# generate file of SC names
WC_names_vos <- as.data.frame(rownames(df0_co_wc2))
WC_names_vos <- cbind(as.numeric(ints4rownames), WC_names_vos)
colnames(WC_names_vos) <- (c("id", "Research Field"))
vos_col_names <- (c("id", "label"))
write.table(WC_names_vos, row.names = FALSE, col.names=vos_col_names, sep=",", quote=(2), file ="/Users/laurasheble/SD_WOS/SD_WOS_6mar2015query_WC_adjMatrixdf0_co_wc2_vos_scNames_v2.txt")
length(WC_names_vos)
# NOW WORK IN VOS!
# After working with these in VOSviewer, decided the visuals weren't what I wanted,
# exported gml format from VOS, imported into Gephi.
# For future, could change output to import directly into Gephi (or Pajek)