基于Spark Grahpx+Neo4j 实现用户社群发现
- 2019 年 10 月 3 日
- 筆記
????????????????????????????????????????????????????????????????????????APP???????????????????????????APP??????????????????APP?????????????????????????????????????????????????????????????????????????????????????????APP???????
?????????????????????????????????????????????????????????????????????????????????????
????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
???????????????????????????????????????????????????????????????????????????????????????????????????????
???????????????????????????"????"????????????????????????????????
????
- Triangle Counting?????????????
- Connected Components????????????
- Strongly Connected Components?????????????
- Label Propagation????????
- Louvain?????"???"??????
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????
Spark Graphx????
Spark Graphx?????????????????????????????????????????
????????????Graphx?????????????????????????????Spark Graphx???????id???Long????????????????????id???id????
//?????? val users: RDD[(VertexId, String)] = spark.sparkContext.parallelize(Array((3L, "u3"), (7L, "u7"),(5L, "u5"), (2L, "u2"), (4L, "u4"),(6L, "u6"),(8L, "u8"))) //????? val relationships: RDD[Edge[String]] = spark.sparkContext.parallelize(Array(Edge(7L, 3L,""), Edge(5L, 3L,""),Edge(5L, 2L,""), Edge(6L, 4L,""),Edge(8L, 6L,""))) //????????? val graph = Graph(users, relationships) //????????? val components = graph.connectedComponents() //?????????vertices???tuple???key????????id?value?key??????id(??????id???) val vertices = components.vertices
???vertices????k-v???
/** * vertices? * (6,4) * (8,4) * (3,2) * (7,2) * (5,2) * * ???tuple???key????????id?value?key??????id(??????id???) */
??????relationships?vertices?????????????id????
val result = relationships.map(x =>{ (x.srcId,x.dstId.toString) }).join(vertices) .map(y =>{ // (7,(3,2)) => (2,(7,3)) (y._2._2,(y._1,y._2._1)) })
??????????Neo4j????????????????????????
???????Spark Graphx????????????????????id??????????????????????????????????????TOP N?????????
??????????????????????????????
????????????????????????????????????????????????????????????????????????????????????????Neo4j??????????Neo4j?????????Neo4j?????????????
Neo4j??
Neo4j???????????????????????????????????????Neo4j??????????????????????????????????????????????????????????????
???????????????????????????????????????????????????????????????????????????????????Neo4j????????????????
?????????????????????????????
????????????"?"??????????Ann?Dan????????????????????????????????????????????????????????
????????????????????????Ann???Dan?Ann?Dan????????????????????????????????????Ann?????????????????
????????????????????SQL???????join????????Neo4j??????????????????????????????Neo4j?????????????????SQL????????????Neo4j????
Neo4j???
- ?SQL???????cypher
- ??????????
- ?????Apache Lucence????
- ???UNIQUE??
- ?????????cypher???UI?Neo4j?????
- ??????ACID???????????????????
- ???????????JSON?XLS??
- ????REST API????????????Java?Spring?Scala????
- ??????????UI MVC????Node JS????Java??
- ?????Java API?Cypher API?Native Java API???Java????
- ?????????????
Cypher??
Cypher?Neo4j?????????????????????SQL???????????
- ????
MATCHWHERE RETURN - ??
() ????
[] ????????????????????????
{} ???????????key:value?????????????????????????? -
??
????????????????????????0???????????
node:label1:label2 ???????????,?????????? - ???????
?????? CREATE (n:Person {name : 'Andres'}); ????? MATCH (a:Person),(b:Person) WHERE a.name = 'Node A' AND b.name = 'Node B‘ CREATE (a)-[r:Follow]->(b); ???? MATCH (n:Person { name: 'Andres' }) SET n.name = 'Taylor'; ???? MATCH (n:Person { name:'Taylor' }) DETACH DELETE n; ??? MATCH (a:Person)-[r:Follow]->(b:Person) WHERE a.name = 'Node A' AND b.name = 'Node B‘ DELETE r; ?????????Follow MATCH (:Person { name:'Taylor' })-[r:Follow]->(Person) RETURN Person.name; ?????????? MATCH (ms:Person { name:'Node A' }),(cs:Person { name:'Node B' }), p = shortestPath((ms)-[r:Follow]-(cs)) RETURN p; ????? MATCH (n) DETACH DELETE n
Neo4j?????
??Neo4j???????????????
Cypher????
????Cypher?????Neo4j??????????????????????????????????????????????????????????/?????????????????????????????????????????????????????????
???????????
CREATE (person:Person {name: "Steven", age: 45}) RETURN person CREATE (person:Person {name: "Michael", age: 16}) RETURN person CREATE (person:Person {name: "Rebecca", age: 7}) RETURN person CREATE (person:Person {name: "Linda",age:40}) RETURN person MATCH (steven:Person {name: "Steven"}), (linda:Person {name: "Linda"}) CREATE (steven)-[:IS_MARRIED_TO]->(linda) return steven, linda MATCH (michael:Person {name: "Michael"}), (rebecca:Person {name: "Rebecca"}) CREATE (michael)-[:IS_SIBLILNG]->(rebecca) return michael, rebecca MATCH (steven:Person {name: "Steven"}), (michael:Person {name: "Michael"}) CREATE (steven)-[:HAS_CHILD]->(michael) return steven, michael MATCH (steven:Person {name: "Steven"}), (rebecca:Person {name: "Rebecca"}) CREATE (steven)-[:HAS_CHILD]->(rebecca) return steven, rebecca MATCH (linda:Person {name: "Linda"}), (michael:Person {name: "Michael"}) CREATE (linda)-[:HAS_CHILD]->(michael) return linda, michael MATCH (linda:Person {name: "Linda"}), (rebecca:Person {name: "Rebecca"}) CREATE (linda)-[:HAS_CHILD]->(rebecca) return linda, Rebecca
????????????????
MATCH (michael:Person {name: "Michael"}) CREATE (michael)-[:FRIEND]->(charlie:Person {name: "Charlie", age: 16}) RETURN michael, charlie MATCH (michael:Person {name: "Michael"}) CREATE (michael)-[:FRIEND]->(koby:Person {name: "Koby"}) RETURN michael, koby MATCH (michael:Person {name: "Michael"}) CREATE (michael)-[:FRIEND]->(grant:Person {name: "Grant"}) RETURN michael, grant MATCH (rebecca:Person {name: "Rebecca"}) CREATE (rebecca)-[:FRIEND]->(jordyn:Person {name: "Jordyn"}) RETURN rebecca, jordyn MATCH (rebecca:Person {name: "Rebecca"}) CREATE (rebecca)-[:FRIEND]->(katie:Person {name: "Katie"}) RETURN rebecca, katie
?????????????????
CREATE (movie:Movie {title:"Avengers"}) RETURN movie MATCH (michael:Person {name:"Michael"}), (avengers:Movie {title:"Avengers"}) CREATE (michael)-[:HAS_SEEN {rating:5}]->(avengers) return michael, avengers CREATE (movie:Movie {title:"Batman"}) RETURN movie CREATE (movie:Movie {title:"Gone with the Wind"}) RETURN movie CREATE (movie:Movie {title:"Spongebob Square Pants"}) RETURN movie CREATE (movie:Movie {title:"Avengers 2"}) RETURN movie MATCH (charlie:Person {name:"Charlie"}), (movie:Movie {title:"Batman"}) CREATE (charlie)-[:HAS_SEEN {rating:4}]->(movie) return charlie, movie MATCH (charlie:Person {name:"Charlie"}), (movie:Movie {title:"Gone with the Wind"}) CREATE (charlie)-[:HAS_SEEN {rating:0}]->(movie) return charlie, movie MATCH (koby:Person {name:"Koby"}), (movie:Movie {title:"Batman"}) CREATE (koby)-[:HAS_SEEN {rating:4}]->(movie) return koby, movie MATCH (koby:Person {name:"Koby"}), (movie:Movie {title:"Avengers 2"}) CREATE (koby)-[:HAS_SEEN {rating:5}]->(movie) return koby, movie MATCH (grant:Person {name:"Grant"}), (movie:Movie {title:"Spongebob Square Pants"}) CREATE (grant)-[:HAS_SEEN {rating:1}]->(movie) return grant, movie MATCH (jordyn:Person {name:"Jordyn"}), (movie:Movie {title:"Spongebob Square Pants"}) CREATE (jordyn)-[:HAS_SEEN {rating:5}]->(movie) return jordyn, movie MATCH (michael:Person {name: "Michael"}) SET michael.gender = "male" RETURN michael MATCH (rebecca:Person {name: "Rebecca"}) SET rebecca.gender = "female" RETURN rebecca
????????????steven????????????????3????
MATCH (steven:Person {name:"Steven"})-[:HAS_CHILD]-(child:Person)-[:FRIEND]-(friend:Person)-[hasSeen:HAS_SEEN]-(movie:Movie) WHERE child.gender = "male" AND hasSeen.rating > 3 RETURN DISTINCT movie.title
??
?????????Spark Graphx?????????????????????????????Neo4j??????????Neo4j????????????Neo4j????????????????
??????????????????????????????