Python Examples of xml.etree.ElementTree.parse

Source File: input_definition.py From misp42splunk with GNU Lesser General Public License v3.0

6 votes

def parse(stream):
        """Parse a stream containing XML into an ``InputDefinition``.

        :param stream: stream containing XML to parse.
        :return: definition: an ``InputDefinition`` object.
        """
        definition = InputDefinition()

        # parse XML from the stream, then get the root node
        root = ET.parse(stream).getroot()

        for node in root:
            if node.tag == "configuration":
                # get config for each stanza
                definition.inputs = parse_xml_data(node, "stanza")
            else:
                definition.metadata[node.tag] = node.text

        return definition

Source File: eval_voc.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def parse_voc_rec(filename):
    """
    parse pascal voc record into a dictionary
    :param filename: xml file path
    :return: list of dict
    """
    import xml.etree.ElementTree as ET
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_dict = dict()
        obj_dict['name'] = obj.find('name').text
        obj_dict['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_dict['bbox'] = [int(bbox.find('xmin').text),
                            int(bbox.find('ymin').text),
                            int(bbox.find('xmax').text),
                            int(bbox.find('ymax').text)]
        objects.append(obj_dict)
    return objects

Source File: xml_style.py From mmdetection with Apache License 2.0

6 votes

def get_cat_ids(self, idx):
        """Get category ids in XML file by index.

        Args:
            idx (int): Index of data.

        Returns:
            list[int]: All categories in the image of specified index.
        """

        cat_ids = []
        img_id = self.data_infos[idx]['id']
        xml_path = osp.join(self.img_prefix, 'Annotations', f'{img_id}.xml')
        tree = ET.parse(xml_path)
        root = tree.getroot()
        for obj in root.findall('object'):
            name = obj.find('name').text
            if name not in self.CLASSES:
                continue
            label = self.cat2label[name]
            cat_ids.append(label)

        return cat_ids

Source File: xml_style.py From mmdetection with Apache License 2.0

6 votes

def get_subset_by_classes(self):
        """Filter imgs by user-defined categories."""
        subset_data_infos = []
        for data_info in self.data_infos:
            img_id = data_info['id']
            xml_path = osp.join(self.img_prefix, 'Annotations',
                                f'{img_id}.xml')
            tree = ET.parse(xml_path)
            root = tree.getroot()
            for obj in root.findall('object'):
                name = obj.find('name').text
                if name in self.CLASSES:
                    subset_data_infos.append(data_info)
                    break

        return subset_data_infos

Source File: dis_eval.py From Collaborative-Learning-for-Weakly-Supervised-Object-Detection with MIT License

6 votes

def parse_rec(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)

    return objects

Source File: pascal_voc.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def _parse_voc_anno(filename):
        import xml.etree.ElementTree as ET
        tree = ET.parse(filename)
        height = int(tree.find('size').find('height').text)
        width = int(tree.find('size').find('width').text)
        objects = []
        for obj in tree.findall('object'):
            obj_dict = dict()
            obj_dict['name'] = obj.find('name').text
            obj_dict['difficult'] = int(obj.find('difficult').text)
            bbox = obj.find('bndbox')
            obj_dict['bbox'] = [int(float(bbox.find('xmin').text)),
                                int(float(bbox.find('ymin').text)),
                                int(float(bbox.find('xmax').text)),
                                int(float(bbox.find('ymax').text))]
            objects.append(obj_dict)
        return height, width, objects

Source File: utils.py From DOTA_models with Apache License 2.0

6 votes

def parse_rec(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)
    return objects

Source File: utils.py From DOTA_models with Apache License 2.0

6 votes

def parse_labelme_poly(filename):
    """ Parse a labelme xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['deleted'] = obj.find('deleted').text
        obj_struct['verified'] = int(obj.find('verified').text)
        obj_struct['occluded'] = obj.find('occluded').text
        obj_struct['attributes'] = obj.find('attributes').text
        poly = obj.find('polygon').findall('pt')
        obj_struct['polygon'] = []
        for point in poly:
            pt = [point.find('x').text, point.find('y').text]
            obj_struct['polygon'] = obj_struct['polygon'] + pt
        objects.append(obj_struct)
    return objects

Source File: base.py From robosuite with MIT License

6 votes

def __init__(self, fname):
        """
        Loads a mujoco xml from file.

        Args:
            fname (str): path to the MJCF xml file.
        """
        self.file = fname
        self.folder = os.path.dirname(fname)
        self.tree = ET.parse(fname)
        self.root = self.tree.getroot()
        self.name = self.root.get("model")
        self.worldbody = self.create_default_element("worldbody")
        self.actuator = self.create_default_element("actuator")
        self.asset = self.create_default_element("asset")
        self.equality = self.create_default_element("equality")
        self.contact = self.create_default_element("contact")
        self.default = self.create_default_element("default")
        self.resolve_asset_dependency()

Source File: voc_eval.py From cascade-rcnn_Pytorch with MIT License

6 votes

def parse_rec(filename):
  """ Parse a PASCAL VOC xml file """
  tree = ET.parse(filename)
  objects = []
  for obj in tree.findall('object'):
    obj_struct = {}
    obj_struct['name'] = obj.find('name').text
    obj_struct['pose'] = obj.find('pose').text
    obj_struct['truncated'] = int(obj.find('truncated').text)
    obj_struct['difficult'] = int(obj.find('difficult').text)
    bbox = obj.find('bndbox')
    obj_struct['bbox'] = [int(bbox.find('xmin').text),
                          int(bbox.find('ymin').text),
                          int(bbox.find('xmax').text),
                          int(bbox.find('ymax').text)]
    objects.append(obj_struct)

  return objects

Source File: voc_eval.py From easy-faster-rcnn.pytorch with MIT License

6 votes

def parse_rec(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)

    return objects

Source File: voc_dataset.py From Yolo-v2-pytorch with MIT License

6 votes

def __getitem__(self, item):
        id = self.ids[item]
        image_path = os.path.join(self.data_path, "JPEGImages", "{}.jpg".format(id))
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image_xml_path = os.path.join(self.data_path, "Annotations", "{}.xml".format(id))
        annot = ET.parse(image_xml_path)

        objects = []
        for obj in annot.findall('object'):
            xmin, xmax, ymin, ymax = [int(obj.find('bndbox').find(tag).text) - 1 for tag in
                                      ["xmin", "xmax", "ymin", "ymax"]]
            label = self.classes.index(obj.find('name').text.lower().strip())
            objects.append([xmin, ymin, xmax, ymax, label])
        if self.is_training:
            transformations = Compose([HSVAdjust(), VerticalFlip(), Crop(), Resize(self.image_size)])
        else:
            transformations = Compose([Resize(self.image_size)])
        image, objects = transformations((image, objects))

        return np.transpose(np.array(image, dtype=np.float32), (2, 0, 1)), np.array(objects, dtype=np.float32)

Source File: voc0712.py From CSD-SSD with MIT License

6 votes

def pull_item(self, index):
        img_id = self.ids[index]

        target = ET.parse(self._annopath % img_id).getroot()
        img = cv2.imread(self._imgpath % img_id)
        height, width, channels = img.shape

        if self.target_transform is not None:
            target = self.target_transform(target, width, height)

        if self.transform is not None:
            target = np.array(target)
            img, boxes, labels = self.transform(img, target[:, :4], target[:, 4])
            # to rgb
            img = img[:, :, (2, 1, 0)]
            # img = img.transpose(2, 0, 1)
            target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
        return torch.from_numpy(img).permute(2, 0, 1), target, height, width
        # return torch.from_numpy(img), target, height, width

Source File: voc0712.py From CSD-SSD with MIT License

6 votes

def pull_anno(self, index):
        '''Returns the original annotation of image at index

        Note: not using self.__getitem__(), as any transformations passed in
        could mess up this functionality.

        Argument:
            index (int): index of img to get annotation of
        Return:
            list:  [img_id, [(label, bbox coords),...]]
                eg: ('001718', [('dog', (96, 13, 438, 332))])
        '''
        img_id = self.ids[index]
        anno = ET.parse(self._annopath % img_id).getroot()
        gt = self.target_transform(anno, 1, 1)
        return img_id[1], gt

Source File: voc07_consistency.py From CSD-SSD with MIT License

6 votes

def pull_anno(self, index):
        '''Returns the original annotation of image at index

        Note: not using self.__getitem__(), as any transformations passed in
        could mess up this functionality.

        Argument:
            index (int): index of img to get annotation of
        Return:
            list:  [img_id, [(label, bbox coords),...]]
                eg: ('001718', [('dog', (96, 13, 438, 332))])
        '''
        img_id = self.ids[index]
        anno = ET.parse(self._annopath % img_id).getroot()
        gt = self.target_transform(anno, 1, 1)
        return img_id[1], gt

Source File: voc07_consistency_init.py From CSD-SSD with MIT License

6 votes

def pull_item(self, index):
        img_id = self.ids[index]

        target = ET.parse(self._annopath % img_id).getroot()
        img = cv2.imread(self._imgpath % img_id)
        height, width, channels = img.shape

        if self.target_transform is not None:
            target = self.target_transform(target, width, height)

        if self.transform is not None:
            target = np.array(target)
            img, boxes, labels = self.transform(img, target[:, :4], target[:, 4])
            # to rgb
            img = img[:, :, (2, 1, 0)]
            # img = img.transpose(2, 0, 1)
            target = np.hstack((boxes, np.expand_dims(labels, axis=1)))

        if(img_id[0][(len(img_id[0]) - 7):]=='VOC2007'):
            semi = np.array([1])
        else:
            semi = np.array([0])
            target = np.zeros([1, 5])
        return torch.from_numpy(img).permute(2, 0, 1), target, height, width, semi
        # return torch.from_numpy(img), target, height, width

Source File: eval512.py From CSD-SSD with MIT License

6 votes

def parse_rec(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1,
                              int(bbox.find('ymin').text) - 1,
                              int(bbox.find('xmax').text) - 1,
                              int(bbox.find('ymax').text) - 1]
        objects.append(obj_struct)

    return objects

Source File: eval.py From CSD-SSD with MIT License

6 votes

def parse_rec(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1,
                              int(bbox.find('ymin').text) - 1,
                              int(bbox.find('xmax').text) - 1,
                              int(bbox.find('ymax').text) - 1]
        objects.append(obj_struct)

    return objects

Source File: named_entity.py From razzy-spinner with GNU General Public License v3.0

6 votes

def _tagged_to_parse(self, tagged_tokens):
        """
        Convert a list of tagged tokens to a chunk-parse tree.
        """
        sent = Tree('S', [])

        for (tok,tag) in tagged_tokens:
            if tag == 'O':
                sent.append(tok)
            elif tag.startswith('B-'):
                sent.append(Tree(tag[2:], [tok]))
            elif tag.startswith('I-'):
                if (sent and isinstance(sent[-1], Tree) and
                    sent[-1].label() == tag[2:]):
                    sent[-1].append(tok)
                else:
                    sent.append(Tree(tag[2:], [tok]))
        return sent

Source File: named_entity.py From razzy-spinner with GNU General Public License v3.0

6 votes

def _parse_to_tagged(sent):
        """
        Convert a chunk-parse tree to a list of tagged tokens.
        """
        toks = []
        for child in sent:
            if isinstance(child, Tree):
                if len(child) == 0:
                    print("Warning -- empty chunk in sentence")
                    continue
                toks.append((child[0], 'B-%s' % child.label()))
                for tok in child[1:]:
                    toks.append((tok, 'I-%s' % child.label()))
            else:
                toks.append((child, 'O'))
        return toks

Source File: nombank.py From razzy-spinner with GNU General Public License v3.0

6 votes

def roleset(self, roleset_id):
        """
        :return: the xml description for the given roleset.
        """
        baseform = roleset_id.split('.')[0]
        baseform = baseform.replace('perc-sign','%')
        baseform = baseform.replace('oneslashonezero', '1/10').replace('1/10','1-slash-10')
        framefile = 'frames/%s.xml' % baseform
        if framefile not in self._framefiles:
            raise ValueError('Frameset file for %s not found' %
                             roleset_id)

        # n.b.: The encoding for XML fileids is specified by the file
        # itself; so we ignore self._encoding here.
        etree = ElementTree.parse(self.abspath(framefile).open()).getroot()
        for roleset in etree.findall('predicate/roleset'):
            if roleset.attrib['id'] == roleset_id:
                return roleset
        else:
            raise ValueError('Roleset %s not found in %s' %
                             (roleset_id, framefile))

Source File: nombank.py From razzy-spinner with GNU General Public License v3.0

6 votes

def _read_instance_block(self, stream, instance_filter=lambda inst: True):
        block = []

        # Read 100 at a time.
        for i in range(100):
            line = stream.readline().strip()
            if line:
                inst = NombankInstance.parse(
                    line, self._parse_fileid_xform,
                    self._parse_corpus)
                if instance_filter(inst):
                    block.append(inst)

        return block

######################################################################
#{ Nombank Instance & related datatypes
######################################################################

Source File: nombank.py From razzy-spinner with GNU General Public License v3.0

6 votes

def parse(s):
        # Deal with chains (xx*yy*zz)
        pieces = s.split('*')
        if len(pieces) > 1:
            return NombankChainTreePointer([NombankTreePointer.parse(elt)
                                              for elt in pieces])

        # Deal with split args (xx,yy,zz)
        pieces = s.split(',')
        if len(pieces) > 1:
            return NombankSplitTreePointer([NombankTreePointer.parse(elt)
                                             for elt in pieces])

        # Deal with normal pointers.
        pieces = s.split(':')
        if len(pieces) != 2: raise ValueError('bad nombank pointer %r' % s)
        return NombankTreePointer(int(pieces[0]), int(pieces[1]))

Source File: propbank.py From razzy-spinner with GNU General Public License v3.0

6 votes

def roleset(self, roleset_id):
        """
        :return: the xml description for the given roleset.
        """
        baseform = roleset_id.split('.')[0]
        framefile = 'frames/%s.xml' % baseform
        if framefile not in self._framefiles:
            raise ValueError('Frameset file for %s not found' %
                             roleset_id)

        # n.b.: The encoding for XML fileids is specified by the file
        # itself; so we ignore self._encoding here.
        etree = ElementTree.parse(self.abspath(framefile).open()).getroot()
        for roleset in etree.findall('predicate/roleset'):
            if roleset.attrib['id'] == roleset_id:
                return roleset
        else:
            raise ValueError('Roleset %s not found in %s' %
                             (roleset_id, framefile))

Source File: propbank.py From razzy-spinner with GNU General Public License v3.0

6 votes

def rolesets(self, baseform=None):
        """
        :return: list of xml descriptions for rolesets.
        """
        if baseform is not None:
            framefile = 'frames/%s.xml' % baseform
            if framefile not in self._framefiles:
                raise ValueError('Frameset file for %s not found' %
                                 baseform)
            framefiles = [framefile]
        else:
            framefiles = self._framefiles

        rsets = []
        for framefile in framefiles:
            # n.b.: The encoding for XML fileids is specified by the file
            # itself; so we ignore self._encoding here.
            etree = ElementTree.parse(self.abspath(framefile).open()).getroot()
            rsets.append(etree.findall('predicate/roleset'))
        return LazyConcatenation(rsets)

Source File: propbank.py From razzy-spinner with GNU General Public License v3.0

6 votes

def _read_instance_block(self, stream, instance_filter=lambda inst: True):
        block = []

        # Read 100 at a time.
        for i in range(100):
            line = stream.readline().strip()
            if line:
                inst = PropbankInstance.parse(
                    line, self._parse_fileid_xform,
                    self._parse_corpus)
                if instance_filter(inst):
                    block.append(inst)

        return block

######################################################################
#{ Propbank Instance & related datatypes
######################################################################

Source File: input_definition.py From misp42splunk with GNU Lesser General Public License v3.0

6 votes

def parse(stream):
        """Parse a stream containing XML into an ``InputDefinition``.

        :param stream: stream containing XML to parse.
        :return: definition: an ``InputDefinition`` object.
        """
        definition = InputDefinition()

        # parse XML from the stream, then get the root node
        root = ET.parse(stream).getroot()

        for node in root:
            if node.tag == "configuration":
                # get config for each stanza
                definition.inputs = parse_xml_data(node, "stanza")
            else:
                definition.metadata[node.tag] = node.text

        return definition

Source File: voc_eval.py From Collaborative-Learning-for-Weakly-Supervised-Object-Detection with MIT License

6 votes

def parse_rec(filename):
  """ Parse a PASCAL VOC xml file """
  tree = ET.parse(filename)
  objects = []
  for obj in tree.findall('object'):
    obj_struct = {}
    obj_struct['name'] = obj.find('name').text
    obj_struct['pose'] = obj.find('pose').text
    obj_struct['truncated'] = int(obj.find('truncated').text)
    obj_struct['difficult'] = int(obj.find('difficult').text)
    bbox = obj.find('bndbox')
    obj_struct['bbox'] = [int(bbox.find('xmin').text),
                          int(bbox.find('ymin').text),
                          int(bbox.find('xmax').text),
                          int(bbox.find('ymax').text)]
    objects.append(obj_struct)

  return objects

Source File: nombank.py From razzy-spinner with GNU General Public License v3.0

5 votes

def parse(s, parse_fileid_xform=None, parse_corpus=None):
        pieces = s.split()
        if len(pieces) < 6:
            raise ValueError('Badly formatted nombank line: %r' % s)

        # Divide the line into its basic pieces.
        (fileid, sentnum, wordnum,
          baseform, sensenumber) = pieces[:5]

        args = pieces[5:]
        rel = [args.pop(i) for i,p in enumerate(args) if '-rel' in p]
        if len(rel) != 1:
            raise ValueError('Badly formatted nombank line: %r' % s)

        # Apply the fileid selector, if any.
        if parse_fileid_xform is not None:
            fileid = parse_fileid_xform(fileid)

        # Convert sentence & word numbers to ints.
        sentnum = int(sentnum)
        wordnum = int(wordnum)

        # Parse the predicate location.

        predloc, predid = rel[0].split('-', 1)
        predicate = NombankTreePointer.parse(predloc)

        # Parse the arguments.
        arguments = []
        for arg in args:
            argloc, argid = arg.split('-', 1)
            arguments.append( (NombankTreePointer.parse(argloc), argid) )

        # Put it all together.
        return NombankInstance(fileid, sentnum, wordnum, baseform, sensenumber,
                               predicate, predid, arguments, parse_corpus)

Source File: nombank.py From razzy-spinner with GNU General Public License v3.0

5 votes

def __init__(self, fileid, sentnum, wordnum, baseform, sensenumber,
                 predicate, predid, arguments, parse_corpus=None):

        self.fileid = fileid
        """The name of the file containing the parse tree for this
        instance's sentence."""

        self.sentnum = sentnum
        """The sentence number of this sentence within ``fileid``.
        Indexing starts from zero."""

        self.wordnum = wordnum
        """The word number of this instance's predicate within its
        containing sentence.  Word numbers are indexed starting from
        zero, and include traces and other empty parse elements."""

        self.baseform = baseform
        """The baseform of the predicate."""

        self.sensenumber = sensenumber
        """The sense number of the predicate."""

        self.predicate = predicate
        """A ``NombankTreePointer`` indicating the position of this
        instance's predicate within its containing sentence."""

        self.predid = predid
        """Identifier of the predicate."""

        self.arguments = tuple(arguments)
        """A list of tuples (argloc, argid), specifying the location
        and identifier for each of the predicate's argument in the
        containing sentence.  Argument identifiers are strings such as
        ``'ARG0'`` or ``'ARGM-TMP'``.  This list does *not* contain
        the predicate."""

        self.parse_corpus = parse_corpus
        """A corpus reader for the parse trees corresponding to the
        instances in this nombank corpus."""

Python xml.etree.ElementTree.parse() Examples