Python six.ensure_text() Examples
The following are 30
code examples of six.ensure_text().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
six
, or try the search function
.
Example #1
Source File: checkpoint_hooks.py From tensor2robot with Apache License 2.0 | 6 votes |
def _copy_savedmodel(self, source_dir, destination): """Copy source_dir to destination. This recursively copies all of the files in `source_dir` to destination. `source_dir` is assumed to have the SavedModel format. Args: source_dir: Source directory, should be a path to a SavedModel directory. destination: Base directory to copy these. Returns: Destination path of the copied model. """ source_dir = six.ensure_text(source_dir) destination = six.ensure_text(destination) basename = os.path.basename(source_dir) dest_base_dir = os.path.join(destination, basename) copy_fn(source_dir, dest_base_dir) return dest_base_dir
Example #2
Source File: tfjs_rewriter.py From tfx with Apache License 2.0 | 6 votes |
def _rewrite(self, original_model: rewriter.ModelDescription, rewritten_model: rewriter.ModelDescription): """Rewrites the provided model. Args: original_model: A `ModelDescription` specifying the original model to be rewritten. rewritten_model: A `ModelDescription` specifying the format and location of the rewritten model. Raises: ValueError: If the model could not be sucessfully rewritten. """ if rewritten_model.model_type not in [ rewriter.ModelType.TFJS_MODEL, rewriter.ModelType.ANY_MODEL ]: raise ValueError('TFJSConverter can only convert to the TFJS format.') _convert_tfjs_model( six.ensure_text(original_model.path), six.ensure_text(rewritten_model.path))
Example #3
Source File: print_tf_records.py From lingvo with Apache License 2.0 | 6 votes |
def _CustomShortDebugString(tf_example): text = [] for name, value in sorted(six.iteritems(tf_example.features.feature)): if value.HasField('bytes_list'): if FLAGS.bytes_as_utf8: utf8_values = [ six.ensure_text(v, 'utf-8') for v in value.bytes_list.value ] value_string = _ListDebugString(utf8_values) else: value_string = _ListDebugString(value.bytes_list.value) elif value.HasField('float_list'): value_string = _ListDebugString(value.float_list.value) elif value.HasField('int64_list'): value_string = _ListDebugString(value.int64_list.value, to_string=repr) text += ['%s: %s' % (name, value_string)] return '\n'.join(text)
Example #4
Source File: albert_tokenization.py From bert-for-tf2 with MIT License | 6 votes |
def printable_text(text): """Returns text encoded in a way suitable for print or `tf.logging`.""" # These functions want `str` for both Python2 and Python3, but in one case # it's a Unicode string and in the other it's a byte string. if six.PY3: if isinstance(text, str): return text elif isinstance(text, bytes): return six.ensure_text(text, "utf-8", "ignore") else: raise ValueError("Unsupported string type: %s" % (type(text))) elif six.PY2: if isinstance(text, str): return text elif isinstance(text, six.text_type): return six.ensure_binary(text, "utf-8") else: raise ValueError("Unsupported string type: %s" % (type(text))) else: raise ValueError("Not running on Python2 or Python 3?")
Example #5
Source File: wpm_encoder.py From lingvo with Apache License 2.0 | 6 votes |
def __init__(self, wpm_filepath, merge_prob=1.): """Create a WPM encoder. Args: wpm_filepath: a path to the file containing the vocabulary. merge_prob: the probability of merging tokens while encoding. """ # Load vocabulary file. lines = py_utils.ReadFileLines(wpm_filepath) self._pieces = [] for line in lines: if isinstance(line, six.binary_type): line = six.ensure_text(line, 'utf-8') piece = line.strip().split('\t')[0] self._pieces.append(piece) self._merge_prob = merge_prob
Example #6
Source File: albert_tokenization.py From bert-for-tf2 with MIT License | 6 votes |
def convert_to_unicode(text): """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" if six.PY3: if isinstance(text, str): return text elif isinstance(text, bytes): return six.ensure_text(text, "utf-8", "ignore") else: raise ValueError("Unsupported string type: %s" % (type(text))) elif six.PY2: if isinstance(text, str): return six.ensure_text(text, "utf-8", "ignore") elif isinstance(text, six.text_type): return text else: raise ValueError("Unsupported string type: %s" % (type(text))) else: raise ValueError("Not running on Python2 or Python 3?")
Example #7
Source File: testutils.py From pythonfinder with MIT License | 6 votes |
def normalize_path(path): # type: (AnyStr) -> AnyStr """ Return a case-normalized absolute variable-expanded path. :param str path: The non-normalized path :return: A normalized, expanded, case-normalized path :rtype: str """ path = os.path.abspath(os.path.expandvars(os.path.expanduser(str(path)))) if os.name == "nt" and os.path.exists(path): try: from ctypes import create_unicode_buffer, windll except ImportError: path = os.path.normpath(os.path.normcase(path)) else: BUFSIZE = 500 buffer = create_unicode_buffer(BUFSIZE) get_long_path_name = windll.kernel32.GetLongPathNameW get_long_path_name(six.ensure_text(path), buffer, BUFSIZE) path = buffer.value return path return os.path.normpath(os.path.normcase(path))
Example #8
Source File: tokenization.py From albert with Apache License 2.0 | 6 votes |
def preprocess_text(inputs, remove_space=True, lower=False): """preprocess data by removing extra space and normalize data.""" outputs = inputs if remove_space: outputs = " ".join(inputs.strip().split()) if six.PY2 and isinstance(outputs, str): try: outputs = six.ensure_text(outputs, "utf-8") except UnicodeDecodeError: outputs = six.ensure_text(outputs, "latin-1") outputs = unicodedata.normalize("NFKD", outputs) outputs = "".join([c for c in outputs if not unicodedata.combining(c)]) if lower: outputs = outputs.lower() return outputs
Example #9
Source File: albert_tokenization.py From bert-for-tf2 with MIT License | 6 votes |
def preprocess_text(inputs, remove_space=True, lower=False): """preprocess data by removing extra space and normalize data.""" outputs = inputs if remove_space: outputs = " ".join(inputs.strip().split()) if six.PY2 and isinstance(outputs, str): try: outputs = six.ensure_text(outputs, "utf-8") except UnicodeDecodeError: outputs = six.ensure_text(outputs, "latin-1") outputs = unicodedata.normalize("NFKD", outputs) outputs = "".join([c for c in outputs if not unicodedata.combining(c)]) if lower: outputs = outputs.lower() return outputs
Example #10
Source File: os_utilities.py From luci-py with Apache License 2.0 | 6 votes |
def get_hostname(): """Returns the machine's hostname.""" if platforms.is_gce() and not os.path.isfile('/.dockerenv'): # When running on GCE, always use the hostname as defined by GCE. It's # possible the VM hadn't learned about it yet. We ignore GCE hostname when # running inside a Docker container and instead use its own hostname. meta = platforms.gce.get_metadata() or {} hostname = meta.get('instance', {}).get('hostname') if hostname: return hostname.decode('utf-8') # Windows enjoys putting random case in there. Enforces lower case for sanity. hostname = socket.getfqdn().lower() if hostname.endswith('.in-addr.arpa'): # When OSX fails to get the FDQN, it returns as the base name the IPv4 # address reversed, which is not useful. Get the base hostname as defined by # the host itself instead of the FQDN since the returned FQDN is useless. hostname = socket.gethostname() return six.ensure_text(hostname)
Example #11
Source File: tokenization.py From albert with Apache License 2.0 | 6 votes |
def printable_text(text): """Returns text encoded in a way suitable for print or `tf.logging`.""" # These functions want `str` for both Python2 and Python3, but in one case # it's a Unicode string and in the other it's a byte string. if six.PY3: if isinstance(text, str): return text elif isinstance(text, bytes): return six.ensure_text(text, "utf-8", "ignore") else: raise ValueError("Unsupported string type: %s" % (type(text))) elif six.PY2: if isinstance(text, str): return text elif isinstance(text, six.text_type): return six.ensure_binary(text, "utf-8") else: raise ValueError("Unsupported string type: %s" % (type(text))) else: raise ValueError("Not running on Python2 or Python 3?")
Example #12
Source File: tokenization.py From albert with Apache License 2.0 | 6 votes |
def convert_to_unicode(text): """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" if six.PY3: if isinstance(text, str): return text elif isinstance(text, bytes): return six.ensure_text(text, "utf-8", "ignore") else: raise ValueError("Unsupported string type: %s" % (type(text))) elif six.PY2: if isinstance(text, str): return six.ensure_text(text, "utf-8", "ignore") elif isinstance(text, six.text_type): return text else: raise ValueError("Unsupported string type: %s" % (type(text))) else: raise ValueError("Not running on Python2 or Python 3?")
Example #13
Source File: onsets_frames_transcription_transcribe.py From magenta with Apache License 2.0 | 6 votes |
def create_example(filename, sample_rate, load_audio_with_librosa): """Processes an audio file into an Example proto.""" wav_data = tf.gfile.Open(filename, 'rb').read() example_list = list( audio_label_data_utils.process_record( wav_data=wav_data, sample_rate=sample_rate, ns=music_pb2.NoteSequence(), # decode to handle filenames with extended characters. example_id=six.ensure_text(filename, 'utf-8'), min_length=0, max_length=-1, allow_empty_notesequence=True, load_audio_with_librosa=load_audio_with_librosa)) assert len(example_list) == 1 return example_list[0].SerializeToString()
Example #14
Source File: tokenization.py From embedding-as-service with MIT License | 6 votes |
def preprocess_text(inputs, remove_space=True, lower=False): """preprocess data by removing extra space and normalize data.""" outputs = inputs if remove_space: outputs = " ".join(inputs.strip().split()) if six.PY2 and isinstance(outputs, str): try: outputs = six.ensure_text(outputs, "utf-8") except UnicodeDecodeError: outputs = six.ensure_text(outputs, "latin-1") outputs = unicodedata.normalize("NFKD", outputs) outputs = "".join([c for c in outputs if not unicodedata.combining(c)]) if lower: outputs = outputs.lower() return outputs
Example #15
Source File: plugin.py From fairness-indicators with Apache License 2.0 | 6 votes |
def _get_evaluation_result(self, request): run = request.args.get('run') try: run = six.ensure_text(run) except (UnicodeDecodeError, AttributeError): pass data = [] try: eval_result_output_dir = six.ensure_text( self._multiplexer.Tensors(run, FairnessIndicatorsPlugin.plugin_name) [0].tensor_proto.string_val[0]) eval_result = tfma.load_eval_result(output_path=eval_result_output_dir) # TODO(b/141283811): Allow users to choose different model output names # and class keys in case of multi-output and multi-class model. data = widget_view.convert_slicing_metrics_to_ui_input( eval_result.slicing_metrics) except (KeyError, json_format.ParseError) as error: logging.info('Error while fetching evaluation data, %s', error) return http_util.Respond(request, data, content_type='application/json')
Example #16
Source File: tokenization.py From embedding-as-service with MIT License | 6 votes |
def convert_to_unicode(text): """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" if six.PY3: if isinstance(text, str): return text elif isinstance(text, bytes): return six.ensure_text(text, "utf-8", "ignore") else: raise ValueError("Unsupported string type: %s" % (type(text))) elif six.PY2: if isinstance(text, str): return six.ensure_text(text, "utf-8", "ignore") elif isinstance(text, six.text_type): return text else: raise ValueError("Unsupported string type: %s" % (type(text))) else: raise ValueError("Not running on Python2 or Python 3?")
Example #17
Source File: tokenization.py From embedding-as-service with MIT License | 6 votes |
def printable_text(text): """Returns text encoded in a way suitable for print or `tf.logging`.""" # These functions want `str` for both Python2 and Python3, but in one case # it's a Unicode string and in the other it's a byte string. if six.PY3: if isinstance(text, str): return text elif isinstance(text, bytes): return six.ensure_text(text, "utf-8", "ignore") else: raise ValueError("Unsupported string type: %s" % (type(text))) elif six.PY2: if isinstance(text, str): return text elif isinstance(text, six.text_type): return six.ensure_binary(text, "utf-8") else: raise ValueError("Unsupported string type: %s" % (type(text))) else: raise ValueError("Not running on Python2 or Python 3?")
Example #18
Source File: test_json_serialization.py From scalyr-agent-2 with Apache License 2.0 | 6 votes |
def _test_json_encode(benchmark, json_lib, log_tuple): """ :param json_lib: JSON library to use. :param log_tuple: Tuple with (log_filename, log_bytes_to_use). """ set_json_lib(json_lib) file_name, bytes_to_read = log_tuple if log_tuple not in CACHED_TEST_DATA["encode"]: data = read_bytes_from_log_fixture_file(file_name, bytes_to_read) data = six.ensure_text(data) CACHED_TEST_DATA["encode"][log_tuple] = data data = CACHED_TEST_DATA["encode"][log_tuple] def run_benchmark(): return json_encode(data) result = benchmark.pedantic(run_benchmark, iterations=20, rounds=50) assert get_json_lib() == json_lib assert isinstance(result, six.text_type) # assert json.dumps(data) == result
Example #19
Source File: plugin.py From fairness-indicators with Apache License 2.0 | 6 votes |
def _get_evaluation_result_from_remote_path(self, request): evaluation_output_path = request.args.get('evaluation_output_path') try: evaluation_output_path = six.ensure_text(evaluation_output_path) except (UnicodeDecodeError, AttributeError): pass try: eval_result = tfma.load_eval_result( os.path.dirname(evaluation_output_path), output_file_format=self._get_output_file_format( evaluation_output_path)) data = widget_view.convert_slicing_metrics_to_ui_input( eval_result.slicing_metrics) except (KeyError, json_format.ParseError) as error: logging.info('Error while fetching evaluation data, %s', error) data = [] return http_util.Respond(request, data, content_type='application/json')
Example #20
Source File: visualization.py From tensor2robot with Apache License 2.0 | 6 votes |
def tf_put_text(imgs, texts, text_size=1, text_pos=(0, 30), text_color=(0, 0, 1)): """Adds text to an image tensor.""" def _put_text(imgs, texts): """Python function that renders text onto a image.""" result = np.empty_like(imgs) for i in range(imgs.shape[0]): text = texts[i] if isinstance(text, bytes): text = six.ensure_text(text) # You may need to adjust text size and position and size. # If your images are in [0, 255] range replace (0, 0, 1) with (0, 0, 255) result[i, :, :, :] = cv2.putText( imgs[i, :, :, :], str(text), text_pos, cv2.FONT_HERSHEY_COMPLEX, text_size, text_color, 1) return result return tf.py_func(_put_text, [imgs, texts], Tout=imgs.dtype)
Example #21
Source File: util.py From scalyr-agent-2 with Apache License 2.0 | 6 votes |
def json_encode(obj, output=None, binary=False): """Encodes an object into a JSON string. @param obj: The object to serialize @param output: If not None, a file-like object to which the serialization should be written. @param binary: If True return binary string, otherwise text string. @type obj: dict|list|six.text_type @type binary: bool """ # 2->TODO encode json according to 'binary' flag. if binary: result = six.ensure_binary(_json_encode(obj, None)) if output: output.write(result) else: return result else: return six.ensure_text(_json_encode(obj, output))
Example #22
Source File: scalyr_logging.py From scalyr-agent-2 with Apache License 2.0 | 6 votes |
def formatException(self, ei): # We just want to indent the stack trace to make it easier to write a parsing rule to detect it. output = io.StringIO() try: # 2->TODO 'logging.Formatter.formatException' returns binary data (str) in python2, # so it will not work with io.StringIO here. exception_string = six.ensure_text( logging.Formatter.formatException(self, ei) ) for line in exception_string.splitlines(True): output.write(" ") output.write(line) return output.getvalue() finally: output.close()
Example #23
Source File: loghandler.py From jellyfin-kodi with GNU General Public License v3.0 | 6 votes |
def formatException(self, exc_info): _pluginpath_real = os.path.realpath(__pluginpath__) res = [] for o in traceback.format_exception(*exc_info): o = ensure_text(o, get_filesystem_encoding()) if o.startswith(' File "'): # If this split can't handle your file names, you should seriously consider renaming your files. fn = o.split(' File "', 2)[1].split('", line ', 1)[0] rfn = os.path.realpath(fn) if rfn.startswith(_pluginpath_real): o = o.replace(fn, os.path.relpath(rfn, _pluginpath_real)) res.append(o) return ''.join(res)
Example #24
Source File: checkpoint_hooks.py From tensor2robot with Apache License 2.0 | 6 votes |
def __init__(self, export_fn, export_dir, num_versions = None): """Initializes a `CheckpointExportListener`. Args: export_fn: function which exports the model. export_dir: directory to export models num_versions: number of exports to keep. If unset, keep all exports. """ self._export_fn = export_fn self._export_dir = six.ensure_text(export_dir) tf.io.gfile.makedirs(self._export_dir) self._gc = None if num_versions: self._gc = _DirectoryVersionGC(num_versions) export_dir_contents = sorted(tf.gfile.ListDirectory(self._export_dir)) self._gc.observe_multiple([ os.path.join(self._export_dir, filename) for filename in export_dir_contents ])
Example #25
Source File: convert_stix.py From cti-stix-elevator with BSD 3-Clause "New" or "Revised" License | 5 votes |
def convert_test_mechanism(indicator, indicator_instance): if indicator.test_mechanisms is not None: if not _ALLOW_YARA_AND_SNORT_PATTENS and get_option_value("spec_version") == "2.0": warn("YARA/SNORT/IOC or other patterns are not supported in STIX 2.0. See %s", 504, indicator_instance["id"]) return if hasattr(indicator_instance, "pattern"): # TODO: maybe put in description warn("Only one type pattern can be specified in %s - using 'stix'", 712, indicator_instance["id"]) else: for tm in indicator.test_mechanisms: if hasattr(indicator_instance, "pattern"): msg = "Only one alternative test mechanism allowed for %s in STIX 2.1 - used %s, dropped %s" warn(msg, 506, indicator_instance["id"], indicator_instance["pattern_type"], determine_pattern_type(tm)) else: if isinstance(tm, YaraTestMechanism): indicator_instance["pattern"] = text_type(tm.rule.value) indicator_instance["pattern_type"] = "yara" elif isinstance(tm, SnortTestMechanism): list_of_strings = [] for rule in tm.rules: list_of_strings.append(text_type(rule.value)) indicator_instance["pattern"] = ", ".join(list_of_strings) indicator_instance["pattern_type"] = "snort" elif isinstance(tm, OpenIOCTestMechanism): warn("IOC indicator in %s cannot be converted to a STIX pattern", 410, indicator_instance["id"]) indicator_instance["pattern"] = ensure_text(etree.tostring(tm.ioc)) indicator_instance["pattern_type"] = "openioc"
Example #26
Source File: agent_runner.py From scalyr-agent-2 with Apache License 2.0 | 5 votes |
def write_line(self, path, data): # type: (Path, six.text_type) -> None data = six.ensure_text(data) data = "{0}\n".format(data) self.write_to_file(path, data)
Example #27
Source File: _inspector.py From colabtools with Apache License 2.0 | 5 votes |
def _getdef(self, obj, oname=''): """Safe variant of oinspect.Inspector._getdef. The upstream _getdef method includes the full string representation of all default arguments, which may run arbitrary code. We intercede to apply our custom getargspec wrapper, which uses _safe_repr. Args: obj: function whose definition we want to format. oname: (optional) If provided, prefix the definition with this name. Returns: A formatted definition or None. """ def formatvalue(value): return '=' + _safe_repr(value) try: argspec = _getargspec(obj) if argspec is None: return None return six.ensure_text( oname + inspect.formatargspec(*argspec, formatvalue=formatvalue)) except: # pylint: disable=bare-except logging.exception('Exception raised in ColabInspector._getdef')
Example #28
Source File: _inspector.py From colabtools with Apache License 2.0 | 5 votes |
def _getdoc(obj): """Custom wrapper for inspect.getdoc. IPython.core.oinspect.getdoc wraps Python's inspect.getdoc to catch exceptions and allow for objects with a custom getdoc() method. However, there are two problems: * inspect.getdoc already catches any exceptions * it then calls get_encoding, which calls inspect.getfile, which may call repr(obj) (to use in an error string, which oinspect.getdoc throws away). We replace this with our own wrapper which still allows for custom getdoc() methods, but avoids calling inspect.getfile. Args: obj: an object to fetch a docstring for Returns: A docstring or ''. """ if hasattr(obj, 'getdoc'): try: docstring = obj.getdoc() except Exception: # pylint: disable=broad-except pass else: if isinstance(docstring, six.string_types): return docstring docstring = inspect.getdoc(obj) or '' # In principle, we want to find the file associated with obj, and use that # encoding here. However, attempting to find the file may lead to calling # repr(obj), so we instead assume UTF8 and replace non-UTF8 characters. return six.ensure_text(docstring, errors='backslashreplace')
Example #29
Source File: utils.py From django-drf-filepond with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _get_file_id(): file_id = shortuuid.uuid() return six.ensure_text(file_id)
Example #30
Source File: jsonrpc.py From monero-python with BSD 3-Clause "New" or "Revised" License | 5 votes |
def send_transaction(self, blob, relay=True): res = self.raw_request('/sendrawtransaction', { 'tx_as_hex': six.ensure_text(binascii.hexlify(blob)), 'do_not_relay': not relay}) if res['status'] == 'OK': return res raise exceptions.TransactionBroadcastError( "{status}: {reason}".format(**res), details=res)