R語言 igraph——圖挖掘助力社會網絡分析

``> library(igraph)> # Create a directed graph> g <- graph(c(0,1, 0,2, 1,3, 0,3), directed=T)> gVertices: 4Edges: 4Directed: TRUEEdges:[0] 0 -> 1[1] 0 -> 2[2] 1 -> 3[3] 0 -> 3> # Create a directed graph using adjacency matrix> m <- matrix(runif(4*4), nrow=4)> m[,1]      [,2]      [,3]      [,4][1,] 0.4086389 0.2160924 0.1557989 0.2896239[2,] 0.4669456 0.1071071 0.1290673 0.3715809[3,] 0.2031678 0.3911691 0.5906273 0.7417764[4,] 0.8808119 0.7687493 0.9734323 0.4487252> g <- graph.adjacency(m > 0.5)> gVertices: 4Edges: 5Directed: TRUEEdges:[0] 2 -> 2[1] 2 -> 3[2] 3 -> 0[3] 3 -> 1[4] 3 -> 2> plot(g, layout=layout.fruchterman.reingold)>``

iGraph也提供了多種創建各種圖形的圖表的簡單方法

``> #Create a full graph> g1 <- graph.full(4)> g1Vertices: 4Edges: 6Directed: FALSEEdges:[0] 0 -- 1[1] 0 -- 2[2] 0 -- 3[3] 1 -- 2[4] 1 -- 3[5] 2 -- 3> #Create a ring graph> g2 <- graph.ring(3)> g2Vertices: 3Edges: 3Directed: FALSEEdges:[0] 0 -- 1[1] 1 -- 2[2] 0 -- 2> #Combine 2 graphs> g <- g1 %du% g2> gVertices: 7Edges: 9Directed: FALSEEdges:[0] 0 -- 1[1] 0 -- 2[2] 0 -- 3[3] 1 -- 2[4] 1 -- 3[5] 2 -- 3[6] 4 -- 5[7] 5 -- 6[8] 4 -- 6> graph.difference(g, graph(c(0,1,0,2), directed=F))Vertices: 7Edges: 7Directed: FALSEEdges:[0] 0 -- 3[1] 1 -- 3[2] 1 -- 2[3] 2 -- 3[4] 4 -- 6[5] 4 -- 5[6] 5 -- 6> # Create a lattice> g1 = graph.lattice(c(3,4,2))> # Create a tree> g2 = graph.tree(12, children=2)> plot(g1, layout=layout.fruchterman.reingold)> plot(g2, layout=layout.reingold.tilford)``

iGraph還提供了另外兩種圖表生成的機制。“隨機圖表”可以在任意兩個節點之間進行連線。而“優先連接”會給已經擁有較大度數的節點再增加連線（也就是多者更多）。

``# Generate random graph, fixed probability> g <- erdos.renyi.game(20, 0.3)> plot(g, layout=layout.fruchterman.reingold, vertex.label=NA, vertex.size=5)# Generate random graph, fixed number of arcs> g <- erdos.renyi.game(20, 15, type='gnm')# Generate preferential attachment graph> g <- barabasi.game(60, power=1, zero.appeal=1.3)``

``# Create the graph and assign random edge weights> g <- erdos.renyi.game(12, 0.35)> E(g)\$weight <- round(runif(length(E(g))),2) * 50> plot(g, layout=layout.fruchterman.reingold, edge.label=E(g)\$weight)# Compute the minimum spanning tree> mst <- minimum.spanning.tree(g)> plot(mst, layout=layout.reingold.tilford,  edge.label=E(mst)\$weight)``

``> g <- graph(c(0, 1, 1, 2, 2, 0, 1, 3, 3, 4, 4, 5, 5, 3, 4, 6, 6, 7, 7, 8, 8, 6, 9, 10, 10, 11, 11, 9))# Nodes reachable from node4> subcomponent(g, 4, mode="out")[1] 4 5 6 3 7 8# Nodes who can reach node4> subcomponent(g, 4, mode="in")[1] 4 3 1 5 0 2> clusters(g, mode="weak")\$membership [1] 0 0 0 0 0 0 0 0 0 1 1 1\$csize[1] 9 3\$no[1] 2> myc <- clusters(g, mode="strong")> myc\$membership [1] 1 1 1 2 2 2 3 3 3 0 0 0\$csize[1] 3 3 3 3\$no[1] 4> mycolor <- c('green', 'yellow', 'red', 'skyblue')> V(g)\$color <- mycolor[myc\$membership + 1]> plot(g, layout=layout.fruchterman.reingold)``

``> g <- erdos.renyi.game(12, 0.25)> plot(g, layout=layout.fruchterman.reingold)> pa <- get.shortest.paths(g, 5, 9)[[1]]> pa[1] 5 0 4 9> V(g)[pa]\$color <- 'green'> E(g)\$color <- 'grey'> E(g, path=pa)\$color <- 'red'> E(g, path=pa)\$width <- 3> plot(g, layout=layout.fruchterman.reingold)``

- 圖表的大小（節點和連線的數量）
- 圖表的密度是緊密的（|E|與|V|的平方成正比）還是稀疏的（|E|與|V|成正比）?
- 圖表是連通的（大部分節點是互通的）還是非連通的（節點是孤立的）？
- 圖表中最長的兩點之間距離
- 有向圖的對稱性
- 出/入“度”的分布

``> # Create a random graph > g <- erdos.renyi.game(200, 0.01)> plot(g, layout=layout.fruchterman.reingold, vertex.label=NA, vertex.size=3)> # No of nodes> length(V(g))[1] 200> # No of edges> length(E(g))[1] 197> # Density (No of edges / possible edges)> graph.density(g)[1] 0.009899497> # Number of islands> clusters(g)\$no[1] 34> # Global cluster coefficient:> #(close triplets/all triplets)> transitivity(g, type="global")[1] 0.015> # Edge connectivity, 0 since graph is disconnected> edge.connectivity(g)[1] 0> # Same as graph adhesion> graph.adhesion(g)[1] 0> # Diameter of the graph> diameter(g)[1] 18> # Reciprocity of the graph> reciprocity(g)[1] 1> # Diameter of the graph> diameter(g)[1] 18> # Reciprocity of the graph> reciprocity(g)[1] 1> degree.distribution(g)[1] 0.135 0.280 0.315 0.110 0.095 0.050 0.005 0.010> plot(degree.distribution(g), xlab="node degree")> lines(degree.distribution(g))``

- 計算兩點之間沒有公用連線的路徑（也就是需要移除多少條連線可以使兩節點不連通）
- 計算兩點之間的最短路徑
- 計算兩點之間路徑的數量和長度

``> # Create a random graph> g <- erdos.renyi.game(9, 0.5)> plot(g, layout=layout.fruchterman.reingold)> # Compute the shortest path matrix> shortest.paths(g)      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [1,]    0    1    3    1    2    2    1    3    2 [2,]    1    0    2    2    3    2    2    2    1 [3,]    3    2    0    2    1    2    2    2    1 [4,]    1    2    2    0    3    1    2    2    1 [5,]    2    3    1    3    0    3    1    3    2 [6,]    2    2    2    1    3    0    2    1    1 [7,]    1    2    2    2    1    2    0    2    1 [8,]    3    2    2    2    3    1    2    0    1 [9,]    2    1    1    1    2    1    1    1    0> # Compute the connectivity matrix> M <- matrix(rep(0, 81), nrow=9)> M      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [1,]    0    0    0    0    0    0    0    0    0 [2,]    0    0    0    0    0    0    0    0    0 [3,]    0    0    0    0    0    0    0    0    0 [4,]    0    0    0    0    0    0    0    0    0 [5,]    0    0    0    0    0    0    0    0    0 [6,]    0    0    0    0    0    0    0    0    0 [7,]    0    0    0    0    0    0    0    0    0 [8,]    0    0    0    0    0    0    0    0    0 [9,]    0    0    0    0    0    0    0    0    0> for (i in 0:8) {+   for (j in 0:8) {+     if (i == j) {+       M[i+1, j+1] <- -1+     } else {+       M[i+1, j+1] <- edge.connectivity(g, i, j)+     }+   }+ }> M      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [1,]   -1    2    2    3    2    3    3    2    3 [2,]    2   -1    2    2    2    2    2    2    2 [3,]    2    2   -1    2    2    2    2    2    2 [4,]    3    2    2   -1    2    3    3    2    3 [5,]    2    2    2    2   -1    2    2    2    2 [6,]    3    2    2    3    2   -1    3    2    3 [7,]    3    2    2    3    2    3   -1    2    3 [8,]    2    2    2    2    2    2    2   -1    2 [9,]    3    2    2    3    2    3    3    2   -1> ``

- 擁有較高出/入度數的節點也擁有較高的“度中心性”
- 與其他節點之間有短路徑的節點擁有較高的“密集中心性”
- 與其他節點對之間有最短路徑的節點擁有較高的“中間性”
- 連接了許多中心性較高節點的節點擁有較高的“特征向量中心性”
- 本地簇系數意味着相鄰節點的互聯性

``> # Degree> degree(g)[1] 2 2 2 2 2 3 3 2 6> # Closeness (inverse of average dist)> closeness(g)[1] 0.4444444 0.5333333 0.5333333 0.5000000[5] 0.4444444 0.5333333 0.6153846 0.5000000[9] 0.8000000> # Betweenness> betweenness(g)[1]  0.8333333  2.3333333  2.3333333[4]  0.0000000  0.8333333  0.5000000[7]  6.3333333  0.0000000 18.8333333> # Local cluster coefficient> transitivity(g, type="local")[1] 0.0000000 0.0000000 0.0000000 1.0000000[5] 0.0000000 0.6666667 0.0000000 1.0000000[9] 0.1333333> # Eigenvector centrality> evcent(g)\$vector[1] 0.3019857 0.4197153 0.4197153 0.5381294[5] 0.3019857 0.6693142 0.5170651 0.5381294[9] 1.0000000> # Now rank them> order(degree(g))[1] 1 2 3 4 5 8 6 7 9> order(closeness(g))[1] 1 5 4 8 2 3 6 7 9> order(betweenness(g))[1] 4 8 6 1 5 2 3 7 9> order(evcent(g)\$vector)[1] 1 5 2 3 7 4 8 6 9``

``> # Create a graph> g1 <- barabasi.game(100, directed=F)> g2 <- barabasi.game(100, directed=F)> g <- g1 %u% g2> lay <- layout.fruchterman.reingold(g)> # Plot the eigevector and betweenness centrality> plot(evcent(g)\$vector, betweenness(g))> text(evcent(g)\$vector, betweenness(g), 0:100, cex=0.6, pos=4)> V(g)[12]\$color <- 'red'> V(g)[8]\$color <- 'green'> plot(g, layout=lay, vertex.size=8, vertex.label.cex=0.6)``