Python torch_geometric.data.InMemoryDataset() Examples
The following are 7
code examples of torch_geometric.data.InMemoryDataset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch_geometric.data
, or try the search function
.
Example #1
Source File: test_dataset.py From pytorch_geometric with MIT License | 8 votes |
def test_in_memory_dataset(): class TestDataset(InMemoryDataset): def __init__(self, data_list): super(TestDataset, self).__init__('/tmp/TestDataset') self.data, self.slices = self.collate(data_list) x = torch.Tensor([[1], [1], [1]]) edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]]) face = torch.tensor([[0], [1], [2]]) i = 1 s = '1' data1 = Data(x=x, edge_index=edge_index, face=face, test_int=i, test_str=s) data1.num_nodes = 10 data2 = Data(x=x, edge_index=edge_index, face=face, test_int=i, test_str=s) data2.num_nodes = 5 dataset = TestDataset([data1, data2]) assert len(dataset) == 2 assert dataset[0].num_nodes == 10 assert len(dataset[0]) == 5 assert dataset[1].num_nodes == 5 assert len(dataset[1]) == 5
Example #2
Source File: models.py From gdc with MIT License | 6 votes |
def __init__(self, dataset: InMemoryDataset, hidden: List[int] = [64], dropout: float = 0.5): super(GCN, self).__init__() num_features = [dataset.data.x.shape[1]] + hidden + [dataset.num_classes] layers = [] for in_features, out_features in zip(num_features[:-1], num_features[1:]): layers.append(GCNConv(in_features, out_features)) self.layers = ModuleList(layers) self.reg_params = list(layers[0].parameters()) self.non_reg_params = list([p for l in layers[1:] for p in l.parameters()]) self.dropout = Dropout(p=dropout) self.act_fn = ReLU()
Example #3
Source File: unsupervised_node_classification.py From cogdl with MIT License | 6 votes |
def __init__(self, args): super(UnsupervisedNodeClassification, self).__init__(args) dataset = build_dataset(args) self.data = dataset[0] if issubclass(dataset.__class__.__bases__[0], InMemoryDataset): self.num_nodes = self.data.y.shape[0] self.num_classes = dataset.num_classes self.label_matrix = np.zeros((self.num_nodes, self.num_classes), dtype=int) self.label_matrix[range(self.num_nodes), self.data.y] = 1 self.data.edge_attr = self.data.edge_attr.t() else: self.label_matrix = self.data.y self.num_nodes, self.num_classes = self.data.y.shape self.model = build_model(args) self.model_name = args.model self.hidden_size = args.hidden_size self.num_shuffle = args.num_shuffle self.save_dir = args.save_dir self.enhance = args.enhance self.args = args self.is_weighted = self.data.edge_attr is not None
Example #4
Source File: data.py From gdc with MIT License | 5 votes |
def get_dataset(name: str, use_lcc: bool = True) -> InMemoryDataset: path = os.path.join(DATA_PATH, name) if name in ['Cora', 'Citeseer', 'Pubmed']: dataset = Planetoid(path, name) elif name in ['Computers', 'Photo']: dataset = Amazon(path, name) elif name == 'CoauthorCS': dataset = Coauthor(path, 'CS') else: raise Exception('Unknown dataset.') if use_lcc: lcc = get_largest_connected_component(dataset) x_new = dataset.data.x[lcc] y_new = dataset.data.y[lcc] row, col = dataset.data.edge_index.numpy() edges = [[i, j] for i, j in zip(row, col) if i in lcc and j in lcc] edges = remap_edges(edges, get_node_mapper(lcc)) data = Data( x=x_new, edge_index=torch.LongTensor(edges), y=y_new, train_mask=torch.zeros(y_new.size()[0], dtype=torch.bool), test_mask=torch.zeros(y_new.size()[0], dtype=torch.bool), val_mask=torch.zeros(y_new.size()[0], dtype=torch.bool) ) dataset.data = data return dataset
Example #5
Source File: data.py From gdc with MIT License | 5 votes |
def get_component(dataset: InMemoryDataset, start: int = 0) -> set: visited_nodes = set() queued_nodes = set([start]) row, col = dataset.data.edge_index.numpy() while queued_nodes: current_node = queued_nodes.pop() visited_nodes.update([current_node]) neighbors = col[np.where(row == current_node)[0]] neighbors = [n for n in neighbors if n not in visited_nodes and n not in queued_nodes] queued_nodes.update(neighbors) return visited_nodes
Example #6
Source File: data.py From gdc with MIT License | 5 votes |
def get_largest_connected_component(dataset: InMemoryDataset) -> np.ndarray: remaining_nodes = set(range(dataset.data.x.shape[0])) comps = [] while remaining_nodes: start = min(remaining_nodes) comp = get_component(dataset, start) comps.append(comp) remaining_nodes = remaining_nodes.difference(comp) return np.array(list(comps[np.argmax(list(map(len, comps)))]))
Example #7
Source File: data.py From gdc with MIT License | 5 votes |
def get_adj_matrix(dataset: InMemoryDataset) -> np.ndarray: num_nodes = dataset.data.x.shape[0] adj_matrix = np.zeros(shape=(num_nodes, num_nodes)) for i, j in zip(dataset.data.edge_index[0], dataset.data.edge_index[1]): adj_matrix[i, j] = 1. return adj_matrix