Python xml.etree.ElementTree.parse() Examples
The following are 30
code examples of xml.etree.ElementTree.parse().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
xml.etree.ElementTree
, or try the search function
.
Example #1
Source File: input_definition.py From misp42splunk with GNU Lesser General Public License v3.0 | 6 votes |
def parse(stream): """Parse a stream containing XML into an ``InputDefinition``. :param stream: stream containing XML to parse. :return: definition: an ``InputDefinition`` object. """ definition = InputDefinition() # parse XML from the stream, then get the root node root = ET.parse(stream).getroot() for node in root: if node.tag == "configuration": # get config for each stanza definition.inputs = parse_xml_data(node, "stanza") else: definition.metadata[node.tag] = node.text return definition
Example #2
Source File: eval_voc.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def parse_voc_rec(filename): """ parse pascal voc record into a dictionary :param filename: xml file path :return: list of dict """ import xml.etree.ElementTree as ET tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_dict = dict() obj_dict['name'] = obj.find('name').text obj_dict['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_dict['bbox'] = [int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text)] objects.append(obj_dict) return objects
Example #3
Source File: xml_style.py From mmdetection with Apache License 2.0 | 6 votes |
def get_cat_ids(self, idx): """Get category ids in XML file by index. Args: idx (int): Index of data. Returns: list[int]: All categories in the image of specified index. """ cat_ids = [] img_id = self.data_infos[idx]['id'] xml_path = osp.join(self.img_prefix, 'Annotations', f'{img_id}.xml') tree = ET.parse(xml_path) root = tree.getroot() for obj in root.findall('object'): name = obj.find('name').text if name not in self.CLASSES: continue label = self.cat2label[name] cat_ids.append(label) return cat_ids
Example #4
Source File: xml_style.py From mmdetection with Apache License 2.0 | 6 votes |
def get_subset_by_classes(self): """Filter imgs by user-defined categories.""" subset_data_infos = [] for data_info in self.data_infos: img_id = data_info['id'] xml_path = osp.join(self.img_prefix, 'Annotations', f'{img_id}.xml') tree = ET.parse(xml_path) root = tree.getroot() for obj in root.findall('object'): name = obj.find('name').text if name in self.CLASSES: subset_data_infos.append(data_info) break return subset_data_infos
Example #5
Source File: dis_eval.py From Collaborative-Learning-for-Weakly-Supervised-Object-Detection with MIT License | 6 votes |
def parse_rec(filename): """ Parse a PASCAL VOC xml file """ tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text obj_struct['pose'] = obj.find('pose').text obj_struct['truncated'] = int(obj.find('truncated').text) obj_struct['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_struct['bbox'] = [int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text)] objects.append(obj_struct) return objects
Example #6
Source File: pascal_voc.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def _parse_voc_anno(filename): import xml.etree.ElementTree as ET tree = ET.parse(filename) height = int(tree.find('size').find('height').text) width = int(tree.find('size').find('width').text) objects = [] for obj in tree.findall('object'): obj_dict = dict() obj_dict['name'] = obj.find('name').text obj_dict['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_dict['bbox'] = [int(float(bbox.find('xmin').text)), int(float(bbox.find('ymin').text)), int(float(bbox.find('xmax').text)), int(float(bbox.find('ymax').text))] objects.append(obj_dict) return height, width, objects
Example #7
Source File: utils.py From DOTA_models with Apache License 2.0 | 6 votes |
def parse_rec(filename): """ Parse a PASCAL VOC xml file """ tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text obj_struct['pose'] = obj.find('pose').text obj_struct['truncated'] = int(obj.find('truncated').text) obj_struct['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_struct['bbox'] = [int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text)] objects.append(obj_struct) return objects
Example #8
Source File: utils.py From DOTA_models with Apache License 2.0 | 6 votes |
def parse_labelme_poly(filename): """ Parse a labelme xml file """ tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text obj_struct['deleted'] = obj.find('deleted').text obj_struct['verified'] = int(obj.find('verified').text) obj_struct['occluded'] = obj.find('occluded').text obj_struct['attributes'] = obj.find('attributes').text poly = obj.find('polygon').findall('pt') obj_struct['polygon'] = [] for point in poly: pt = [point.find('x').text, point.find('y').text] obj_struct['polygon'] = obj_struct['polygon'] + pt objects.append(obj_struct) return objects
Example #9
Source File: base.py From robosuite with MIT License | 6 votes |
def __init__(self, fname): """ Loads a mujoco xml from file. Args: fname (str): path to the MJCF xml file. """ self.file = fname self.folder = os.path.dirname(fname) self.tree = ET.parse(fname) self.root = self.tree.getroot() self.name = self.root.get("model") self.worldbody = self.create_default_element("worldbody") self.actuator = self.create_default_element("actuator") self.asset = self.create_default_element("asset") self.equality = self.create_default_element("equality") self.contact = self.create_default_element("contact") self.default = self.create_default_element("default") self.resolve_asset_dependency()
Example #10
Source File: voc_eval.py From cascade-rcnn_Pytorch with MIT License | 6 votes |
def parse_rec(filename): """ Parse a PASCAL VOC xml file """ tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text obj_struct['pose'] = obj.find('pose').text obj_struct['truncated'] = int(obj.find('truncated').text) obj_struct['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_struct['bbox'] = [int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text)] objects.append(obj_struct) return objects
Example #11
Source File: voc_eval.py From easy-faster-rcnn.pytorch with MIT License | 6 votes |
def parse_rec(filename): """ Parse a PASCAL VOC xml file """ tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text obj_struct['pose'] = obj.find('pose').text obj_struct['truncated'] = int(obj.find('truncated').text) obj_struct['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_struct['bbox'] = [int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text)] objects.append(obj_struct) return objects
Example #12
Source File: voc_dataset.py From Yolo-v2-pytorch with MIT License | 6 votes |
def __getitem__(self, item): id = self.ids[item] image_path = os.path.join(self.data_path, "JPEGImages", "{}.jpg".format(id)) image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_xml_path = os.path.join(self.data_path, "Annotations", "{}.xml".format(id)) annot = ET.parse(image_xml_path) objects = [] for obj in annot.findall('object'): xmin, xmax, ymin, ymax = [int(obj.find('bndbox').find(tag).text) - 1 for tag in ["xmin", "xmax", "ymin", "ymax"]] label = self.classes.index(obj.find('name').text.lower().strip()) objects.append([xmin, ymin, xmax, ymax, label]) if self.is_training: transformations = Compose([HSVAdjust(), VerticalFlip(), Crop(), Resize(self.image_size)]) else: transformations = Compose([Resize(self.image_size)]) image, objects = transformations((image, objects)) return np.transpose(np.array(image, dtype=np.float32), (2, 0, 1)), np.array(objects, dtype=np.float32)
Example #13
Source File: voc0712.py From CSD-SSD with MIT License | 6 votes |
def pull_item(self, index): img_id = self.ids[index] target = ET.parse(self._annopath % img_id).getroot() img = cv2.imread(self._imgpath % img_id) height, width, channels = img.shape if self.target_transform is not None: target = self.target_transform(target, width, height) if self.transform is not None: target = np.array(target) img, boxes, labels = self.transform(img, target[:, :4], target[:, 4]) # to rgb img = img[:, :, (2, 1, 0)] # img = img.transpose(2, 0, 1) target = np.hstack((boxes, np.expand_dims(labels, axis=1))) return torch.from_numpy(img).permute(2, 0, 1), target, height, width # return torch.from_numpy(img), target, height, width
Example #14
Source File: voc0712.py From CSD-SSD with MIT License | 6 votes |
def pull_anno(self, index): '''Returns the original annotation of image at index Note: not using self.__getitem__(), as any transformations passed in could mess up this functionality. Argument: index (int): index of img to get annotation of Return: list: [img_id, [(label, bbox coords),...]] eg: ('001718', [('dog', (96, 13, 438, 332))]) ''' img_id = self.ids[index] anno = ET.parse(self._annopath % img_id).getroot() gt = self.target_transform(anno, 1, 1) return img_id[1], gt
Example #15
Source File: voc07_consistency.py From CSD-SSD with MIT License | 6 votes |
def pull_anno(self, index): '''Returns the original annotation of image at index Note: not using self.__getitem__(), as any transformations passed in could mess up this functionality. Argument: index (int): index of img to get annotation of Return: list: [img_id, [(label, bbox coords),...]] eg: ('001718', [('dog', (96, 13, 438, 332))]) ''' img_id = self.ids[index] anno = ET.parse(self._annopath % img_id).getroot() gt = self.target_transform(anno, 1, 1) return img_id[1], gt
Example #16
Source File: voc07_consistency_init.py From CSD-SSD with MIT License | 6 votes |
def pull_item(self, index): img_id = self.ids[index] target = ET.parse(self._annopath % img_id).getroot() img = cv2.imread(self._imgpath % img_id) height, width, channels = img.shape if self.target_transform is not None: target = self.target_transform(target, width, height) if self.transform is not None: target = np.array(target) img, boxes, labels = self.transform(img, target[:, :4], target[:, 4]) # to rgb img = img[:, :, (2, 1, 0)] # img = img.transpose(2, 0, 1) target = np.hstack((boxes, np.expand_dims(labels, axis=1))) if(img_id[0][(len(img_id[0]) - 7):]=='VOC2007'): semi = np.array([1]) else: semi = np.array([0]) target = np.zeros([1, 5]) return torch.from_numpy(img).permute(2, 0, 1), target, height, width, semi # return torch.from_numpy(img), target, height, width
Example #17
Source File: eval512.py From CSD-SSD with MIT License | 6 votes |
def parse_rec(filename): """ Parse a PASCAL VOC xml file """ tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text obj_struct['pose'] = obj.find('pose').text obj_struct['truncated'] = int(obj.find('truncated').text) obj_struct['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1, int(bbox.find('ymin').text) - 1, int(bbox.find('xmax').text) - 1, int(bbox.find('ymax').text) - 1] objects.append(obj_struct) return objects
Example #18
Source File: eval.py From CSD-SSD with MIT License | 6 votes |
def parse_rec(filename): """ Parse a PASCAL VOC xml file """ tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text obj_struct['pose'] = obj.find('pose').text obj_struct['truncated'] = int(obj.find('truncated').text) obj_struct['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1, int(bbox.find('ymin').text) - 1, int(bbox.find('xmax').text) - 1, int(bbox.find('ymax').text) - 1] objects.append(obj_struct) return objects
Example #19
Source File: named_entity.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def _tagged_to_parse(self, tagged_tokens): """ Convert a list of tagged tokens to a chunk-parse tree. """ sent = Tree('S', []) for (tok,tag) in tagged_tokens: if tag == 'O': sent.append(tok) elif tag.startswith('B-'): sent.append(Tree(tag[2:], [tok])) elif tag.startswith('I-'): if (sent and isinstance(sent[-1], Tree) and sent[-1].label() == tag[2:]): sent[-1].append(tok) else: sent.append(Tree(tag[2:], [tok])) return sent
Example #20
Source File: named_entity.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def _parse_to_tagged(sent): """ Convert a chunk-parse tree to a list of tagged tokens. """ toks = [] for child in sent: if isinstance(child, Tree): if len(child) == 0: print("Warning -- empty chunk in sentence") continue toks.append((child[0], 'B-%s' % child.label())) for tok in child[1:]: toks.append((tok, 'I-%s' % child.label())) else: toks.append((child, 'O')) return toks
Example #21
Source File: nombank.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def roleset(self, roleset_id): """ :return: the xml description for the given roleset. """ baseform = roleset_id.split('.')[0] baseform = baseform.replace('perc-sign','%') baseform = baseform.replace('oneslashonezero', '1/10').replace('1/10','1-slash-10') framefile = 'frames/%s.xml' % baseform if framefile not in self._framefiles: raise ValueError('Frameset file for %s not found' % roleset_id) # n.b.: The encoding for XML fileids is specified by the file # itself; so we ignore self._encoding here. etree = ElementTree.parse(self.abspath(framefile).open()).getroot() for roleset in etree.findall('predicate/roleset'): if roleset.attrib['id'] == roleset_id: return roleset else: raise ValueError('Roleset %s not found in %s' % (roleset_id, framefile))
Example #22
Source File: nombank.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def _read_instance_block(self, stream, instance_filter=lambda inst: True): block = [] # Read 100 at a time. for i in range(100): line = stream.readline().strip() if line: inst = NombankInstance.parse( line, self._parse_fileid_xform, self._parse_corpus) if instance_filter(inst): block.append(inst) return block ###################################################################### #{ Nombank Instance & related datatypes ######################################################################
Example #23
Source File: nombank.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def parse(s): # Deal with chains (xx*yy*zz) pieces = s.split('*') if len(pieces) > 1: return NombankChainTreePointer([NombankTreePointer.parse(elt) for elt in pieces]) # Deal with split args (xx,yy,zz) pieces = s.split(',') if len(pieces) > 1: return NombankSplitTreePointer([NombankTreePointer.parse(elt) for elt in pieces]) # Deal with normal pointers. pieces = s.split(':') if len(pieces) != 2: raise ValueError('bad nombank pointer %r' % s) return NombankTreePointer(int(pieces[0]), int(pieces[1]))
Example #24
Source File: propbank.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def roleset(self, roleset_id): """ :return: the xml description for the given roleset. """ baseform = roleset_id.split('.')[0] framefile = 'frames/%s.xml' % baseform if framefile not in self._framefiles: raise ValueError('Frameset file for %s not found' % roleset_id) # n.b.: The encoding for XML fileids is specified by the file # itself; so we ignore self._encoding here. etree = ElementTree.parse(self.abspath(framefile).open()).getroot() for roleset in etree.findall('predicate/roleset'): if roleset.attrib['id'] == roleset_id: return roleset else: raise ValueError('Roleset %s not found in %s' % (roleset_id, framefile))
Example #25
Source File: propbank.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def rolesets(self, baseform=None): """ :return: list of xml descriptions for rolesets. """ if baseform is not None: framefile = 'frames/%s.xml' % baseform if framefile not in self._framefiles: raise ValueError('Frameset file for %s not found' % baseform) framefiles = [framefile] else: framefiles = self._framefiles rsets = [] for framefile in framefiles: # n.b.: The encoding for XML fileids is specified by the file # itself; so we ignore self._encoding here. etree = ElementTree.parse(self.abspath(framefile).open()).getroot() rsets.append(etree.findall('predicate/roleset')) return LazyConcatenation(rsets)
Example #26
Source File: propbank.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def _read_instance_block(self, stream, instance_filter=lambda inst: True): block = [] # Read 100 at a time. for i in range(100): line = stream.readline().strip() if line: inst = PropbankInstance.parse( line, self._parse_fileid_xform, self._parse_corpus) if instance_filter(inst): block.append(inst) return block ###################################################################### #{ Propbank Instance & related datatypes ######################################################################
Example #27
Source File: input_definition.py From misp42splunk with GNU Lesser General Public License v3.0 | 6 votes |
def parse(stream): """Parse a stream containing XML into an ``InputDefinition``. :param stream: stream containing XML to parse. :return: definition: an ``InputDefinition`` object. """ definition = InputDefinition() # parse XML from the stream, then get the root node root = ET.parse(stream).getroot() for node in root: if node.tag == "configuration": # get config for each stanza definition.inputs = parse_xml_data(node, "stanza") else: definition.metadata[node.tag] = node.text return definition
Example #28
Source File: voc_eval.py From Collaborative-Learning-for-Weakly-Supervised-Object-Detection with MIT License | 6 votes |
def parse_rec(filename): """ Parse a PASCAL VOC xml file """ tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): obj_struct = {} obj_struct['name'] = obj.find('name').text obj_struct['pose'] = obj.find('pose').text obj_struct['truncated'] = int(obj.find('truncated').text) obj_struct['difficult'] = int(obj.find('difficult').text) bbox = obj.find('bndbox') obj_struct['bbox'] = [int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text)] objects.append(obj_struct) return objects
Example #29
Source File: nombank.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def parse(s, parse_fileid_xform=None, parse_corpus=None): pieces = s.split() if len(pieces) < 6: raise ValueError('Badly formatted nombank line: %r' % s) # Divide the line into its basic pieces. (fileid, sentnum, wordnum, baseform, sensenumber) = pieces[:5] args = pieces[5:] rel = [args.pop(i) for i,p in enumerate(args) if '-rel' in p] if len(rel) != 1: raise ValueError('Badly formatted nombank line: %r' % s) # Apply the fileid selector, if any. if parse_fileid_xform is not None: fileid = parse_fileid_xform(fileid) # Convert sentence & word numbers to ints. sentnum = int(sentnum) wordnum = int(wordnum) # Parse the predicate location. predloc, predid = rel[0].split('-', 1) predicate = NombankTreePointer.parse(predloc) # Parse the arguments. arguments = [] for arg in args: argloc, argid = arg.split('-', 1) arguments.append( (NombankTreePointer.parse(argloc), argid) ) # Put it all together. return NombankInstance(fileid, sentnum, wordnum, baseform, sensenumber, predicate, predid, arguments, parse_corpus)
Example #30
Source File: nombank.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def __init__(self, fileid, sentnum, wordnum, baseform, sensenumber, predicate, predid, arguments, parse_corpus=None): self.fileid = fileid """The name of the file containing the parse tree for this instance's sentence.""" self.sentnum = sentnum """The sentence number of this sentence within ``fileid``. Indexing starts from zero.""" self.wordnum = wordnum """The word number of this instance's predicate within its containing sentence. Word numbers are indexed starting from zero, and include traces and other empty parse elements.""" self.baseform = baseform """The baseform of the predicate.""" self.sensenumber = sensenumber """The sense number of the predicate.""" self.predicate = predicate """A ``NombankTreePointer`` indicating the position of this instance's predicate within its containing sentence.""" self.predid = predid """Identifier of the predicate.""" self.arguments = tuple(arguments) """A list of tuples (argloc, argid), specifying the location and identifier for each of the predicate's argument in the containing sentence. Argument identifiers are strings such as ``'ARG0'`` or ``'ARGM-TMP'``. This list does *not* contain the predicate.""" self.parse_corpus = parse_corpus """A corpus reader for the parse trees corresponding to the instances in this nombank corpus."""