Gremlin创建schema(包括实体和关系)
1、构建图谱schema,流程包括图创建、实体构建以及关系构建。
创建图时需要指定图库名称以及主键字段。
实体构建时需要指定主键字段,每个属性需要指定数据类型,是否非空以及默认值。关系构建时需要包括关系名称、指向头实体的标签,指向尾实体的标签等字段。
Java代码展示:
import com.gridgraph.driver.GridGraphAuthenticationException;
import com.gridgraph.driver.GridGraphSecureCluster;
import org.apache.tinkerpop.gremlin.driver.Client;
public class AddSchema {public static void main(String[] args) throws Exception {try{String address = "localhost";int port = 9999;String username = "";String password = "";String database = "实体1";GridGraphSecureCluster cluster = new GridGraphSecureCluster(address, port, username, password);Client client = cluster.connect(true);StringBuilder sb = new StringBuilder();sb.append(String.format("graph=GridGraphFactory.createGraph('%s');graph.createPrimaryKey('id');", database));sb.append("equipSchema = graph.createSchema('实体1', SchemaType.VERTEX);");sb.append("equipSchema.createProperty('属性1', GridDataType.STRING, false, false, null);");sb.append("equipSchema.createProperty('属性2', GridDataType.STRING, false, false, null);");sb.append("manuSchema = graph.createSchema('实体2', SchemaType.VERTEX);");sb.append("manuSchema.createProperty('属性1', GridDataType.STRING, false, false, null);");sb.append("manuSchema.createProperty('属性2', GridDataType.STRING, false, false, null);");sb.append("subSchema = graph.createSchema('实体3', SchemaType.VERTEX);");sb.append("subSchema.createProperty('属性1', GridDataType.STRING, false, false, null);");sb.append("subSchema.createProperty('属性2', GridDataType.STRING, false, false, null);");sb.append("relSchema = graph.createSchema('关系', SchemaType.EDGE);");sb.append("relSchema.createProperty('h_table', GridDataType.STRING, false, false, null);");sb.append("relSchema.createProperty('t_table', GridDataType.STRING, false, false, null);");sb.append("relSchema.createProperty('r', GridDataType.STRING, false, false, null);");client.submit(sb.toString());client.close();} catch (GridGraphAuthenticationException e) {throw new RuntimeException(e);}}
}
Python代码展示:
#encoding=utf8
from gremlin_python.driver import clientif __name__ == '__main__':client = client.Client('ws://localhost:9999/gremlin', None, username='', password='')database = "实体1"prefix = f"graph=GridGraphFactory.createGraph('{database}');graph.createPrimaryKey('id');" client.submit(prefix +f'''equipSchema = graph.createSchema('实体1', SchemaType.VERTEX);equipSchema.createProperty('属性1', GridDataType.STRING, false, false, null);equipSchema.createProperty('属性2', GridDataType.STRING, false, false, null);manuSchema = graph.createSchema('实体2', SchemaType.VERTEX);manuSchema.createProperty('属性1', GridDataType.STRING, false, false, null);manuSchema.createProperty('属性2', GridDataType.STRING, false, false, null);subSchema = graph.createSchema('实体3', SchemaType.VERTEX);subSchema.createProperty('属性1', GridDataType.STRING, false, false, null);subSchema.createProperty('属性2', GridDataType.STRING, false, false, null);relSchema = graph.createSchema('关系', SchemaType.EDGE)relSchema.createProperty('h_table', GridDataType.STRING, false, false, null)relSchema.createProperty('t_table', GridDataType.STRING, false, false, null)relSchema.createProperty('r', GridDataType.STRING, false, false, null)''')client.close()
2、填入数据,添加实体时,需要指定实体主键、属性以及对应的属性值,添加关系时,需要指定头实体、尾实体以及关系名,最终形成知识图谱。
Java代码展示:
import com.gridgraph.driver.GridGraphAuthenticationException;
import com.gridgraph.driver.GridGraphSecureCluster;
import org.apache.tinkerpop.gremlin.driver.Client;
public class AddData {public static void main(String[] args) throws Exception {try{String address = "localhost";int port = 9999;String username = "";String password = "";String database = "实体1";GridGraphSecureCluster cluster = new GridGraphSecureCluster(address, port, username, password);Client client = cluster.connect(true);StringBuilder sb = new StringBuilder();sb.append(String.format("graph=GridGraphFactory.openGraph('%s');", database));sb.append("e1 = graph.addVertex(T.label, '实体1', 'id', '1', '属性1', '1', '属性2', '2');");sb.append("e2 = graph.addVertex(T.label, '实体2', 'id', '2', '属性1', '1', '属性2', '2');");sb.append("e3 = graph.addVertex(T.label, '实体3', 'id', '3', '属性1', '1', '属性2', '2');");sb.append("e1.addEdge('关系', e2, 'h_table', '实体1', 't_table', '实体3', 'r', 'r1');");sb.append("e1.addEdge('关系', e3, 'h_table', '实体1', 't_table', '实体3', 'r', 'r2');");sb.append("graph.tx().commit();");client.submit(sb.toString());client.close();} catch (GridGraphAuthenticationException e) {throw new RuntimeException(e);}}
Python代码展示:
#encoding=utf8
from gremlin_python.driver import client
if __name__ == '__main__':client = client.Client('ws://localhost:9999/gremlin', None, username='', password='')database = "实体1"prefix = f"graph=GridGraphFactory.openGraph('{database}');g=graph.traversal();"client.submit(prefix +f'''e1 = graph.addVertex(T.label, '实体1', 'id', '1', '属性1', '1', '属性2', '2') e2 = graph.addVertex(T.label, '实体2', 'id', '2', '属性1', '1', '属性2', '2')e3 = graph.addVertex(T.label, '实体3', 'id', '3', '属性1', '1', '属性2', '2')e1.addEdge('关系', e2, 'h_table', '实体1', 't_table', '实体3', 'r', 'r1')e1.addEdge('关系', e3, 'h_table', '实体1', 't_table', '实体3', 'r', 'r2')graph.tx().commit();''')client.close()
3、打印schema结构
对于每一个实体,遍历图数据库中所有的schema,同时遍历每一个schema中的每一个属性,生成“实体类型(属性1,属性2,属性3)”的结构;
对于每一个关系,可以遍历所有关系数据中的 头标签、关系名、尾标签,对其进行去重,生成“头标签--[r:关系名]-->尾标签”的结构。
Java代码展示:
import com.gridgraph.driver.GridGraphAuthenticationException;
import com.gridgraph.driver.GridGraphSecureCluster;
import org.apache.tinkerpop.gremlin.driver.Client;
import org.apache.tinkerpop.gremlin.driver.Result;
import org.apache.tinkerpop.gremlin.driver.ResultSet;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class SchemaPrompt {public static void main(String[] args) throws Exception {try{String address = "localhost";int port = 9999;String username = "";String password = "";String database = "实体1";GridGraphSecureCluster cluster = new GridGraphSecureCluster(address, port, username, password);Client client = cluster.connect(true);client.submit(String.format("graph=GridGraphFactory.openGraph('%s');g=graph.traversal();return;", database));ResultSet resultSet = client.submit("return graph.schemas().stream().map{s -> s.getName()};");StringBuilder entity_str = new StringBuilder("实体有:");StringBuilder rel_str = new StringBuilder("关系有:");List<String> resultList = resultSet.all().get().stream().map(Result::getString).collect(Collectors.toList());List<String> entities = new ArrayList<>();List<String> rels = new ArrayList<>();for(String table:resultList){ResultSet typeSet = client.submit(String.format("return graph.getSchema('%s').getType().toString();", table));List<String> typeList = typeSet.all().get().stream().map(Result::getString).collect(Collectors.toList());if(typeList.get(0).equals("VERTEX")){ //实体类型StringBuilder entity = new StringBuilder();entity.append(table);ResultSet prosSet = client.submit(String.format("return graph.getSchema('%s').getProperties().stream().map{s -> s.getName()};", table));List<String> prosList = prosSet.all().get().stream().map(Result::getString).collect(Collectors.toList());entity.append("(");for(int i = 0; i < prosList.size(); i++) {String pro = prosList.get(i);if (pro.equals("_id"))continue;entity.append(pro);if(i < prosList.size() - 1)entity.append(',');}entity.append(")");entities.add(entity.toString());}else if(typeList.get(0).equals("EDGE")){ResultSet relationSet = client.submit(String.format("return g.E().hasLabel('%s').valueMap('h_table', 'r', 't_table').dedup();", table));List<Result> relationList = relationSet.all().get();for(Result rel:relationList){Map<String, String> a_rel = rel.get(Map.class);String h_table = a_rel.get("h_table");String t_table = a_rel.get("t_table");String r = a_rel.get("r");rels.add(String.format("%s--[r:%s]->%s", h_table, r, t_table));}}}entity_str.append(String.join(",", entities));rel_str.append(String.join(",", rels));System.out.println(entity_str);System.out.println(rel_str);
client.close();} catch (GridGraphAuthenticationException e) {throw new RuntimeException(e);}}
}
Python代码展示:
#encoding=utf8
from gremlin_python.driver import client
if __name__ == '__main__':client = client.Client('ws://localhost:9999/gremlin', None, username='', password='')database = "实体1"prefix = f'graph=GridGraphFactory.openGraph("{database}");g=graph.traversal();'tables = client.submit(prefix + "graph.schemas().stream().map{s -> s.getName()};").all().result()entities, rels = [], []for table in tables:tp = client.submit(prefix + f"graph.getSchema('{table}').getType().toString();").one()[0]if tp == 'VERTEX': #实体类型pros = client.submit(prefix + f"graph.getSchema('{table}')" + ".getProperties().stream().map{s -> s.getName()};").all().result()pros = [pro for pro in pros if pro != '_id']entities.append(f"{table}({','.join(pros)})")elif tp == 'EDGE':rs = client.submit(prefix + f"g.E().hasLabel('{table}').valueMap('h_table', 'r', 't_table').dedup();").all().result()for mp in rs:rels.append(f"{mp['h_table']}--[r:{mp['r']}]-->{mp['t_table']}")
''' 也可以用:g.E().hasLabel('关系').toList().stream().map{e -> e.outVertex().schema().getName()+"--[r:"+e.values('r').next()+ "]-->" + e.inVertex().schema().getName()}.distinct()
'''print("实体有:" + ",".join(entities))print("关系有:" + ",".join(rels))client.close()