gate.util.Files Java Examples
The following examples show how to use
gate.util.Files.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CorpusExporter.java From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 | 6 votes |
/** * Create a corpus exporter instance for the given Exporter. * * This method will create the proper corpus representation for the * algorithm and the feature info. * * @param exporter exporter * @param parms parameters * @param featureInfo feature info * @param instanceType instance type * @param datadir data directory * @return CorpusExporter instance */ public static CorpusExporter create(Exporter exporter, String parms, FeatureInfo featureInfo, String instanceType, URL datadir) { CorpusExporter ce = null; try { @SuppressWarnings("unchecked") Constructor<?> constr = exporter.getCorpusExporterClass().getDeclaredConstructor(); ce = (CorpusExporter)constr.newInstance(); } catch (IllegalAccessException | IllegalArgumentException | InstantiationException | NoSuchMethodException | SecurityException | InvocationTargetException ex) { throw new GateRuntimeException("Error creating CorpusExporter instance for "+exporter.getCorpusExporterClass(),ex); } ce.datadir = datadir; ce.exporter = exporter; ce.targetType = exporter.getTargetType(); ce.featureInfo = featureInfo; ce.parms = parms; ce.instanceType = instanceType; ce.dataDirFile = Files.fileFromURL(datadir); ce.initWhenCreating(); return ce; }
Example #2
Source File: SerialDataStore.java From gate-core with GNU Lesser General Public License v3.0 | 5 votes |
/** Delete the data store. */ @Override public void delete() throws PersistenceException { if(storageDir == null || ! Files.rmdir(storageDir)) throw new PersistenceException("couldn't delete " + storageDir); Gate.getDataStoreRegister().remove(this); }
Example #3
Source File: GappModel.java From gate-core with GNU Lesser General Public License v3.0 | 5 votes |
/** * Write out the (possibly modified) GAPP file to its new location. * * @throws IOException if an I/O error occurs. */ public void write() throws IOException { finish(); File newGappFile = Files.fileFromURL(gappFileURL); FileOutputStream fos = new FileOutputStream(newGappFile); BufferedOutputStream out = new BufferedOutputStream(fos); XMLOutputter outputter = new XMLOutputter(Format.getRawFormat()); outputter.output(gappDocument, out); }
Example #4
Source File: EngineMBSklearnBase.java From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 | 5 votes |
@Override protected void loadModel(URL directoryURL, String parms) { ArrayList<String> finalCommand = new ArrayList<>(); // Instead of loading a model, this establishes a connection with the // external sklearn process. if(!"file".equals(directoryURL.getProtocol())) { throw new GateRuntimeException("The dataDirectory URL must be a file: URL for sklearn"); } File directory = Files.fileFromURL(directoryURL); File commandFile = findWrapperCommand(directory, true); String modelFileName = new File(directory,MODEL_BASENAME).getAbsolutePath(); finalCommand.add(commandFile.getAbsolutePath()); finalCommand.add(modelFileName); // if we have a shell command prepend that, and if we have shell parms too, include them if(shellcmd != null) { finalCommand.add(0,shellcmd); if(shellparms != null) { String[] sps = shellparms.trim().split("\\s+"); int i=0; for(String sp : sps) { finalCommand.add(++i,sp); } } } //System.err.println("Running: "+finalCommand); // Create a fake Model jsut to make LF_Apply... happy which checks if this is null model = MODEL_INSTANCE; Map<String,String> env = new HashMap<>(); env.put(ENV_WRAPPER_HOME, wrapperhome); process = Process4JsonStream.create(directory,env,finalCommand); }
Example #5
Source File: EngineDVFileJson.java From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 | 5 votes |
@Override protected void initWhenCreating(URL directory, Algorithm algorithm, String parms, FeatureInfo featureInfo, TargetType targetType) { dataDir = Files.fileFromURL(directory); this.featureInfo = featureInfo; corpusRepresentation = new CorpusRepresentationVolatileDense2JsonStream(dataDir, featureInfo); corpusRepresentation.startAdding(); this.featureInfo = featureInfo; // NOTE: we are copying the wrapper code only when starting training, not // here. This allows the user to copy their own code while the PR is running // but creating the corpus has not yet finished. }
Example #6
Source File: EngineDVFileJson.java From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 | 5 votes |
@Override protected void loadAndSetCorpusRepresentation(URL directory) { //System.err.println("DEBUG EngineDVFileJson: running loadAndSetCorpusRepresentation "+directory); // this does not actually need to load anything but the featureInfo ... // this is needed to convert our instance data to JSON, which is then sent // off to the script or server which is responsible to use any other saved // model info (the model itself, scaling info, vocab info, embeddings etc) dataDir = Files.fileFromURL(directory); featureInfo = FeatureInfo.load(directory); corpusRepresentation = new CorpusRepresentationVolatileDense2JsonStream(dataDir, featureInfo); }
Example #7
Source File: EngineMBLibSVM.java From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 | 5 votes |
@Override public void loadModel(URL directory, String parms) { if(!"file".equals(directory.getProtocol())) { throw new GateRuntimeException("The dataDirectory URL must be a file: URL for LibSVM"); } try { File directoryFile = Files.fileFromURL(directory); svm_model svmModel = svm.svm_load_model(new File(directoryFile, FILENAME_MODEL).getAbsolutePath()); // System.out.println("Loaded LIBSVM model, nrclasses=" + svmModel.nr_class); model = svmModel; } catch (IOException | IllegalArgumentException ex) { throw new GateRuntimeException("Error loading the LIBSVM model from directory "+directory, ex); } }
Example #8
Source File: ApplicationPDF.java From wandora with GNU General Public License v3.0 | 5 votes |
private void initialize() throws FileNotFoundException, IOException, MalformedURLException, URISyntaxException, Exception { pdfPanel.addMouseListener(actionListener); pdfPanel.addMouseMotionListener(actionListener); pdfPanel.addKeyListener(actionListener); pdfPanel.setComponentPopupMenu(menu); byte[] pdfBytes = null; if(DataURL.isDataURL(source)) { pdfBytes = new DataURL(source).getData(); } else { URL sourceURL = new URL(source); if("file".equalsIgnoreCase(sourceURL.getProtocol())) { pdfBytes = Files.getByteArray(new File(sourceURL.toURI())); } else { pdfBytes = IObox.fetchUrl(sourceURL); } } if(pdfBytes != null) { pdfFile = new PDFFile(ByteBuffer.wrap(pdfBytes)); if(pdfFile != null) { pageCount = pdfFile.getNumPages(); if(pageCount > 0) { setPageText.invoke(1, pageCount); pdfPanel.changePage(pdfFile.getPage(0)); } else { throw new Exception("PDF has no pages at all. Can't view."); } } } else { throw new Exception("Can't read data out of locator resource."); } }
Example #9
Source File: LuceneDataStoreImpl.java From gate-core with GNU Lesser General Public License v3.0 | 4 votes |
/** Open a connection to the data store. */ @Override public void open() throws PersistenceException { super.open(); /* * check if the storage directory is a valid serial datastore if we * want to support old style: String versionInVersionFile = "1.0"; * (but this means it will open *any* directory) */ try (BufferedReader isr = new BufferedReader(new FileReader(getVersionFile()))) { currentProtocolVersion = isr.readLine(); String indexDirRelativePath = isr.readLine(); if(indexDirRelativePath != null && indexDirRelativePath.trim().length() > 1) { URL storageDirURL = storageDir.toURI().toURL(); URL theIndexURL = new URL(storageDirURL, indexDirRelativePath); // check if index directory exists File indexDir = Files.fileFromURL(theIndexURL); if(!indexDir.exists()) { throw new PersistenceException("Index directory " + indexDirRelativePath + " could not be found for datastore at " + storageDirURL); } indexURL = theIndexURL; this.indexer = new LuceneIndexer(indexURL); this.searcher = new LuceneSearcher(); ((LuceneSearcher)this.searcher).setLuceneDatastore(this); } } catch(IOException e) { throw new PersistenceException("Invalid storage directory: " + e); } if(!isValidProtocolVersion(currentProtocolVersion)) throw new PersistenceException("Invalid protocol version number: " + currentProtocolVersion); // Lets create a separate indexer thread which keeps running in the // background executor = new ScheduledThreadPoolExecutor(1, Executors.defaultThreadFactory()); // set up the executor so it does not execute delayed indexing tasks // that are still waiting when it is shut down. We run these tasks // immediately at shutdown time rather than waiting. executor.setContinueExistingPeriodicTasksAfterShutdownPolicy(false); executor.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); // start listening to Creole events Gate.getCreoleRegister().addCreoleListener(this); }
Example #10
Source File: CorpusImpl.java From gate-core with GNU Lesser General Public License v3.0 | 4 votes |
/** * Fills the provided corpus with documents created on the fly from * selected files in a directory. Uses a {@link FileFilter} to select * which files will be used and which will be ignored. A simple file * filter based on extensions is provided in the Gate distribution ( * {@link gate.util.ExtensionFileFilter}). * * @param corpus the corpus to be populated * @param directory the directory from which the files will be picked. * This parameter is an URL for uniformity. It needs to be a * URL of type file otherwise an InvalidArgumentException * will be thrown. * @param filter the file filter used to select files from the target * directory. If the filter is <tt>null</tt> all the files * will be accepted. * @param encoding the encoding to be used for reading the documents * @param recurseDirectories should the directory be parsed * recursively?. If <tt>true</tt> all the files from the * provided directory and all its children directories (on as * many levels as necessary) will be picked if accepted by * the filter otherwise the children directories will be * ignored. * @throws java.io.IOException if a file doesn't exist */ public static void populate(Corpus corpus, URL directory, FileFilter filter, String encoding, String mimeType, boolean recurseDirectories) throws IOException { // check input if(!directory.getProtocol().equalsIgnoreCase("file")) throw new IllegalArgumentException( "The URL provided is not of type \"file:\"!"); File dir = Files.fileFromURL(directory); if(!dir.exists()) throw new FileNotFoundException(dir.toString()); if(!dir.isDirectory()) throw new IllegalArgumentException(dir.getAbsolutePath() + " is not a directory!"); File[] files; // populate the corpus if(recurseDirectories) { files = Files.listFilesRecursively(dir, filter); } else { files = dir.listFiles(filter); } if(files == null) { return; } // sort the files alphabetically regardless of their paths Arrays.sort(files, new Comparator<File>() { @Override public int compare(File f1, File f2) { return f1.getName().compareTo(f2.getName()); } }); // create the GATE documents for(File file : files) { if(file.isDirectory()) { continue; } StatusListener sListener = (StatusListener)Gate.getListeners().get( "gate.event.StatusListener"); if(sListener != null) sListener.statusChanged("Reading: " + file.getName()); String docName = file.getName() + "_" + Gate.genSym(); FeatureMap params = Factory.newFeatureMap(); params.put(Document.DOCUMENT_URL_PARAMETER_NAME, file.toURI().toURL()); if(encoding != null) params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding); if(mimeType != null) params.put(Document.DOCUMENT_MIME_TYPE_PARAMETER_NAME, mimeType); try { Document doc = (Document)Factory.createResource(DocumentImpl.class .getName(), params, null, docName); corpus.add(doc); if(corpus.getLRPersistenceId() != null) { // persistent corpus -> unload the document corpus.unloadDocument(doc); Factory.deleteResource(doc); } } catch(Throwable t) { String nl = Strings.getNl(); Err.prln("WARNING: Corpus.populate could not instantiate document" + nl + " Document name was: " + docName + nl + " Exception was: " + t + nl + nl); t.printStackTrace(); } if(sListener != null) sListener.statusChanged(file.getName() + " read"); } }
Example #11
Source File: TestXml.java From gate-core with GNU Lesser General Public License v3.0 | 4 votes |
private void runCompleteTestWithAFormat(URL url, String urlDescription) throws Exception{ // Load the xml Key Document and unpack it gate.Document keyDocument = null; FeatureMap params = Factory.newFeatureMap(); params.put(Document.DOCUMENT_URL_PARAMETER_NAME, url); params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false"); keyDocument = (Document)Factory.createResource("gate.corpora.DocumentImpl", params); assertTrue("Coudn't create a GATE document instance for " + url.toString() + " Can't continue." , keyDocument != null); gate.DocumentFormat keyDocFormat = null; keyDocFormat = gate.DocumentFormat.getDocumentFormat( keyDocument, keyDocument.getSourceUrl() ); assertTrue("Fail to recognize " + url.toString() + " as being " + urlDescription + " !", keyDocFormat != null); // Unpack the markup keyDocFormat.unpackMarkup(keyDocument); // Verfy if all annotations from the default annotation set are consistent gate.corpora.TestDocument.verifyNodeIdConsistency(keyDocument); // Verifies if the maximum annotation ID on the GATE doc is less than the // Annotation ID generator of the document. verifyAnnotationIDGenerator(keyDocument); // Save the size of the document and the number of annotations long keyDocumentSize = keyDocument.getContent().size().longValue(); int keyDocumentAnnotationSetSize = keyDocument.getAnnotations().size(); // Export the Gate document called keyDocument as XML, into a temp file, // using the working encoding File xmlFile = null; xmlFile = Files.writeTempFile(keyDocument.toXml(), workingEncoding ); assertTrue("The temp GATE XML file is null. Can't continue.",xmlFile != null); // Load the XML Gate document form the tmp file into memory gate.Document gateDoc = null; gateDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(), workingEncoding); assertTrue("Coudn't create a GATE document instance for " + xmlFile.toURI().toURL().toString() + " Can't continue." , gateDoc != null); gate.DocumentFormat gateDocFormat = null; gateDocFormat = DocumentFormat.getDocumentFormat(gateDoc,gateDoc.getSourceUrl()); assertTrue("Fail to recognize " + xmlFile.toURI().toURL().toString() + " as being a GATE XML document !", gateDocFormat != null); gateDocFormat.unpackMarkup(gateDoc); // Verfy if all annotations from the default annotation set are consistent gate.corpora.TestDocument.verifyNodeIdConsistency(gateDoc); // Save the size of the document snd the number of annotations long gateDocSize = keyDocument.getContent().size().longValue(); int gateDocAnnotationSetSize = keyDocument.getAnnotations().size(); assertTrue("Exporting as GATE XML resulted in document content size lost." + " Something went wrong.", keyDocumentSize == gateDocSize); assertTrue("Exporting as GATE XML resulted in annotation lost." + " No. of annotations missing = " + Math.abs(keyDocumentAnnotationSetSize - gateDocAnnotationSetSize), keyDocumentAnnotationSetSize == gateDocAnnotationSetSize); // Verifies if the maximum annotation ID on the GATE doc is less than the // Annotation ID generator of the document. verifyAnnotationIDGenerator(gateDoc); //Don't need tmp Gate XML file. xmlFile.delete(); }
Example #12
Source File: TestXml.java From gate-core with GNU Lesser General Public License v3.0 | 4 votes |
public void testAnnotationConsistencyForSaveAsXml()throws Exception{ // Load a document from the test repository //Document origDoc = gate.Factory.newDocument(Gate.getUrl("tests/xml/gateTestSaveAsXML.xml")); String testDoc = gate.util.Files.getGateResourceAsString("gate.ac.uk/tests/xml/gateTestSaveAsXML.xml"); Document origDoc = gate.Factory.newDocument(testDoc); // Verifies if the maximum annotation ID on the origDoc is less than the // Annotation ID generator of the document. verifyAnnotationIDGenerator(origDoc); //create a couple of annotations with features we can look at after a round trip to disc Integer ann1ID = origDoc.getAnnotations().add(0L,10L,"Test",Factory.newFeatureMap()); Integer ann2ID = origDoc.getAnnotations().add(15L,20L,"Test",Factory.newFeatureMap()); origDoc.getAnnotations().get(ann1ID).getFeatures().put("matches", Arrays.asList(new Integer[]{ann2ID})); origDoc.getAnnotations().get(ann2ID).getFeatures().put("matches", Arrays.asList(new Integer[]{ann1ID})); // SaveAS XML and reload the document into another GATE doc // Export the Gate document called origDoc as XML, into a temp file, // using the working encoding File xmlFile = Files.writeTempFile(origDoc.toXml(),workingEncoding); System.out.println("Saved to temp file :" + xmlFile.toURI().toURL()); Document reloadedDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(), workingEncoding); // Verifies if the maximum annotation ID on the origDoc is less than the // Annotation ID generator of the document. verifyAnnotationIDGenerator(reloadedDoc); // Verify if the annotations are identical in the two docs. Map<Integer,Annotation> origAnnotMap = buildID2AnnotMap(origDoc); Map<Integer,Annotation> reloadedAnnMap = buildID2AnnotMap(reloadedDoc); //Verifies if the reloaded annotations are the same as the original ones verifyIDConsistency(origAnnotMap, reloadedAnnMap); // Build the original Matches map // ID -> List of IDs Map<Integer,List<Integer>> origMatchesMap = buildMatchesMap(origDoc); // Verify the consistency of matches // Compare every orig annotation pointed by the MatchesMap with the reloadedAnnot // extracted from the reloadedMAp for(Iterator<Integer> it = origMatchesMap.keySet().iterator(); it.hasNext();){ Integer id = it.next(); Annotation origAnnot = origAnnotMap.get(id); assertTrue("Couldn't find an original annot with ID=" + id, origAnnot != null); Annotation reloadedAnnot = reloadedAnnMap.get(id); assertTrue("Couldn't find a reloaded annot with ID=" + id, reloadedAnnot != null); compareAnnot(origAnnot,reloadedAnnot); // Iterate through the matches list and repeat the comparison List<Integer> matchesList = origMatchesMap.get(id); for (Iterator<Integer> itList = matchesList.iterator(); itList.hasNext();){ Integer matchId = itList.next(); Annotation origA = origAnnotMap.get(matchId); assertTrue("Couldn't find an original annot with ID=" + matchId, origA != null); Annotation reloadedA = reloadedAnnMap.get(matchId); assertTrue("Couldn't find a reloaded annot with ID=" + matchId, reloadedA != null); compareAnnot(origA, reloadedA); }// End for }// End for // Clean up the XMl file xmlFile.delete(); }
Example #13
Source File: EngineMBWekaWrapper.java From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 | 4 votes |
@Override protected void loadModel(URL directoryURL, String parms) { ArrayList<String> finalCommand = new ArrayList<>(); // TODO: for now, we only allow URLs which are file: URLs here. // This is because the script wrapping Weka is currently not able to access // the model from any other location. Also, we need to export the // data and currently this is done into the directoryURL. // At some later point, we may be able to e.g. copy the model into // a temporary directory and use the demporary directory also to store // the data! File directoryFile = null; if("file".equals(directoryURL.getProtocol())) { directoryFile = Files.fileFromURL(directoryURL); } else { throw new GateRuntimeException("The dataDirectory for WekaWrapper must be a file: URL"); } // Instead of loading a model, this establishes a connection with the // external weka process. For this, we expect an additional file in the // directory, weka.yaml, which describes how to run the weka wrapper File commandFile = findWrapperCommand(directoryFile, true); // If the directoryURL String modelFileName = new File(directoryFile,FILENAME_MODEL).getAbsolutePath(); if(!new File(modelFileName).exists()) { throw new GateRuntimeException("File not found: "+modelFileName); } String header = new File(directoryFile,"header.arff").getAbsolutePath(); if(!new File(header).exists()) { throw new GateRuntimeException("File not found: "+header); } if(shellcmd != null) { finalCommand.add(shellcmd); if(shellparms != null) { String[] sps = shellparms.trim().split("\\s+"); for(String sp : sps) { finalCommand.add(sp); } } } finalCommand.add(commandFile.getAbsolutePath()); finalCommand.add(modelFileName); finalCommand.add(header); //System.err.println("Running: "+finalCommand); // Create a fake Model jsut to make LF_Apply... happy which checks if this is null model = "ExternalWekaWrapperModel"; Map<String,String> env = new HashMap<>(); env.put(ENV_WRAPPER_HOME,wrapperhome); // NOTE: if the directoryFile is null, the current Java process' directory is used process = Process4ObjectStream.create(directoryFile,env,finalCommand); }
Example #14
Source File: EngineMBPythonNetworksBase.java From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 | 4 votes |
@Override protected void loadModel(URL directoryURL, String parms) { File directory = null; if("file".equals(directoryURL.getProtocol())) { directory = Files.fileFromURL(directoryURL); } else { throw new GateRuntimeException("The dataDirectory for WekaWrapper must be a file: URL not "+directoryURL); } ArrayList<String> finalCommand = new ArrayList<>(); // we need the corpus representation here! Normally this is done from loadEngine and after // load model, but we do it here. The load crm method only loads anything if it is still // null, so we will do this only once anyway. loadAndSetCorpusRepresentation(directoryURL); CorpusRepresentationMalletTarget data = (CorpusRepresentationMalletTarget)corpusRepresentation; SimpleEntry<String,Integer> modeAndNrC = findOutMode(data); String mode = modeAndNrC.getKey(); Integer nrClasses = modeAndNrC.getValue(); // Instead of loading a model, this establishes a connection with the // external wrapper process. File commandFile = findWrapperCommand(directory, true); String modelFileName = new File(directory,MODEL_BASENAME).getAbsolutePath(); finalCommand.add(commandFile.getAbsolutePath()); finalCommand.add(modelFileName); finalCommand.add(mode); finalCommand.add(nrClasses.toString()); // if we have a shell command prepend that, and if we have shell parms too, include them if(shellcmd != null) { finalCommand.add(0,shellcmd); if(shellparms != null) { String[] sps = shellparms.trim().split("\\s+"); int i=0; for(String sp : sps) { finalCommand.add(++i,sp); } } } //System.err.println("Running: "+finalCommand); // Create a fake Model jsut to make LF_Apply... happy which checks if this is null model = MODEL_INSTANCE; Map<String,String> env = new HashMap<>(); env.put(ENV_WRAPPER_HOME, wrapperhome); process = Process4JsonStream.create(directory,env,finalCommand); }