Python pandas.read_msgpack() Examples
The following are 11
code examples of pandas.read_msgpack().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: columnarStructure.py From mmtf-pyspark with Apache License 2.0 | 6 votes |
def get_chain_to_entity_index(self): '''Returns an array that maps a chain index to an entity index Returns ------- :obj:`array <numpy.ndarray>` index that maps chain index to an entity index ''' if self.entityChainIndex is None: #self.entityChainIndex = np.empty(self.structure.num_chains, dtype='>i4') self.entityChainIndex = np.empty(self.structure.num_chains, dtype=np.int32) for i, entity in enumerate(self.structure.entity_list): chainIndexList = entity['chainIndexList'] # pd.read_msgpack returns tuple, msgpack-python returns list if type(chainIndexList) is not list: chainIndexList = list(chainIndexList) self.entityChainIndex[chainIndexList] = i return self.entityChainIndex
Example #2
Source File: mmtfStructure.py From mmtf-pyspark with Apache License 2.0 | 6 votes |
def chain_to_entity_index(self): '''Returns an array that maps a chain index to an entity index Returns ------- :obj:`array <numpy.ndarray>` index that maps chain index to an entity index ''' if self.entityChainIndex is None: self.entityChainIndex = np.empty(self.num_chains, dtype=np.int32) print("chain_to_entity_index: num_chains", self.num_chains) for i, entity in enumerate(self.entity_list): #chainIndexList = entity['chainIndexList'] # pd.read_msgpack returns tuple, msgpack-python returns list # TODO check this #if type(chainIndexList) is not list: # chainIndexList = list(chainIndexList) # TODO need to update entity_list when self.truncate for index in entity['chainIndexList']: if index < self.num_chains: self.entityChainIndex[index] = i
Example #3
Source File: mmtfReader.py From mmtf-pyspark with Apache License 2.0 | 6 votes |
def _call_mmtf(f, first_model=False): '''Call function for mmtf files''' if ".mmtf.gz" in f: name = f.split('/')[-1].split('.')[0].upper() data = gzip.open(f, 'rb') #unpack = msgpack.unpack(data, raw=False) unpack = pd.read_msgpack(data) decoder = MmtfStructure(unpack, first_model) return (name, decoder) elif ".mmtf" in f: #name = f.split('/')[-1].split('.')[0].upper() #unpack = msgpack.unpack(open(f, "rb"), raw=False) #decoder = MmtfStructure(unpack) name = f.split('/')[-1].split('.')[0].upper() unpack = pd.read_msgpack(f) decoder = MmtfStructure(unpack, first_model) return (name, decoder)
Example #4
Source File: cache.py From catalyst with Apache License 2.0 | 6 votes |
def __init__(self, path=None, lock=None, clean_on_failure=True, serialization='msgpack'): self.path = path if path is not None else mkdtemp() self.lock = lock if lock is not None else nop_context self.clean_on_failure = clean_on_failure if serialization == 'msgpack': self.serialize = pd.DataFrame.to_msgpack self.deserialize = pd.read_msgpack self._protocol = None else: s = serialization.split(':', 1) if s[0] != 'pickle': raise ValueError( "'serialization' must be either 'msgpack' or 'pickle[:n]'", ) self._protocol = int(s[1]) if len(s) == 2 else None self.serialize = self._serialize_pickle self.deserialize = pickle.load ensure_directory(self.path)
Example #5
Source File: message.py From timeflux with MIT License | 6 votes |
def msgpack_deserialize(message): # TODO: handle meta and cases where data is None topic = message[0].decode("utf-8") data = message[1] return [topic, pd.read_msgpack(data)] # def arrow_serialize(message): # topic = message[0].decode('utf-8') # df = message[1] # return [topic, pa.serialize(df).to_buffer()] # def arrow_deserialize(message): # topic = message[0] # data = message[1] # return [topic, pa.deserialize(data)]
Example #6
Source File: cache.py From git-pandas with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get(self, orik): k = self.prefix + orik if self.exists(orik): return pd.read_msgpack(self._cache.get(k)) else: try: idx = self._key_list.index(k) self._key_list.pop(idx) except ValueError as e: pass raise CacheMissException(k)
Example #7
Source File: run_pandas.py From recipy with Apache License 2.0 | 5 votes |
def read_msgpack(self): """ Use pandas.read_msgpack to load dataframe.mpack. """ file_name = os.path.join(self.data_dir, "dataframe.mpack") pd.read_msgpack(file_name)
Example #8
Source File: mmtfReader.py From mmtf-pyspark with Apache License 2.0 | 5 votes |
def _get_structure(pdbId, reduced, first_model): '''Download and decode a list of structure from a list of PDBid Parameters ---------- pdbID : list List of structures to download Returns ------- tuple pdbID and deccoder ''' try: #unpack = default_api.get_raw_data_from_url(pdbId, reduced) url = default_api.get_url(pdbId, reduced) request = urllib2.Request(url) request.add_header('Accept-encoding', 'gzip') response = urllib2.urlopen(request) if response.info().get('Content-Encoding') == 'gzip': data = gzip.decompress(response.read()) else: data = response.read() unpack = pd.read_msgpack(data) decoder = MmtfStructure(unpack, first_model) return (pdbId, decoder) except urllib.error.HTTPError: print(f"ERROR: {pdbId} is not a valid pdbId")
Example #9
Source File: mmtfReader.py From mmtf-pyspark with Apache License 2.0 | 5 votes |
def _call_sequence_file(t, first_model): '''Call function for hadoop sequence files''' # TODO: check if all sequence files are gzipped # data = default_api.ungzip_data(t[1]) # unpack = msgpack.unpackb(data.read(), raw=False) # decoder = MmtfStructure(unpack) # return (str(t[0]), decoder) data = gzip.decompress(t[1]) unpack = pd.read_msgpack(data) decoder = MmtfStructure(unpack, first_model) return (t[0], decoder)
Example #10
Source File: dataframe_bytes_storage.py From pyABC with BSD 3-Clause "New" or "Revised" License | 5 votes |
def df_from_bytes_msgpack_(bytes_: bytes) -> pd.DataFrame: try: df = pd.read_msgpack(BytesIO(bytes_)) except UnicodeDecodeError: raise DataFrameLoadException("Not a DataFrame") if not isinstance(df, pd.DataFrame): raise DataFrameLoadException("Not a DataFrame") return df
Example #11
Source File: stock_resampler.py From QUANTAXIS_RealtimeCollector with MIT License | 4 votes |
def on_message_callback(self, channel, method, properties, body): context = pd.read_msgpack(body) # merge update if self.market_data is None: # self.market_data = context pass else: logger.info("Before market_data, concat and update start, 合并市场数据") cur_time = datetime.datetime.now() self.market_data.update(context) end_time = datetime.datetime.now() cost_time = (end_time - cur_time).total_seconds() logger.info("Before market_data, concat and update end, 合并市场数据, 耗时,cost: %s s" % cost_time) logger.info(self.market_data.to_csv(float_format='%.3f')) filename = get_file_name_by_date('stock.market.%s.csv', self.log_dir) # 不追加,复写 logging_csv(self.market_data, filename, index=True, mode='w') # group by code and resample try: cur_time = datetime.datetime.now() bar_data: pd.DataFrame = tdx_stock_bar_resample_parallel( self.market_data[self.market_data.close > 0], self.frequency, jobs=self.cpu_count ) end_time = datetime.datetime.now() cost_time = (end_time - cur_time).total_seconds() logger.info("数据重采样耗时,cost: %s" % cost_time) logger.info("发送重采样数据中start") self.publish_msg(bar_data.to_msgpack()) logger.info("发送重采样数据完毕end") logger.info(bar_data.to_csv(float_format='%.3f')) filename = get_file_name_by_date('stock.bar.%s.csv', self.log_dir) # 不追加,复写 logging_csv(bar_data, filename, index=True, mode='w') del bar_data except Exception as e: logger.error("failure股票重采样数据. " + e.__str__()) finally: logger.info("重采样计数 count : %s" % self.count) self.count += 1 del context