View Javadoc

1   /*******************************************************************************
2    *  Imixs Workflow 
3    *  Copyright (C) 2001, 2011 Imixs Software Solutions GmbH,  
4    *  http://www.imixs.com
5    *  
6    *  This program is free software; you can redistribute it and/or 
7    *  modify it under the terms of the GNU General Public License 
8    *  as published by the Free Software Foundation; either version 2 
9    *  of the License, or (at your option) any later version.
10   *  
11   *  This program is distributed in the hope that it will be useful, 
12   *  but WITHOUT ANY WARRANTY; without even the implied warranty of 
13   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
14   *  General Public License for more details.
15   *  
16   *  You can receive a copy of the GNU General Public
17   *  License at http://www.gnu.org/licenses/gpl.html
18   *  
19   *  Project: 
20   *  	http://www.imixs.org
21   *  	http://java.net/projects/imixs-workflow
22   *  
23   *  Contributors:  
24   *  	Imixs Software Solutions GmbH - initial API and implementation
25   *  	Ralph Soika - Software Developer
26   *******************************************************************************/
27  
28  package org.imixs.workflow.plugins.jee.extended;
29  
30  import java.io.File;
31  import java.text.SimpleDateFormat;
32  import java.util.ArrayList;
33  import java.util.Calendar;
34  import java.util.Collection;
35  import java.util.Date;
36  import java.util.List;
37  import java.util.Properties;
38  import java.util.StringTokenizer;
39  import java.util.logging.Logger;
40  
41  import org.apache.lucene.analysis.Analyzer;
42  import org.apache.lucene.analysis.standard.StandardAnalyzer;
43  import org.apache.lucene.document.Document;
44  import org.apache.lucene.document.Field;
45  import org.apache.lucene.index.IndexWriter;
46  import org.apache.lucene.index.IndexWriterConfig;
47  import org.apache.lucene.index.Term;
48  import org.apache.lucene.queryParser.QueryParser;
49  import org.apache.lucene.search.IndexSearcher;
50  import org.apache.lucene.search.ScoreDoc;
51  import org.apache.lucene.search.TopDocs;
52  import org.apache.lucene.store.Directory;
53  import org.apache.lucene.store.FSDirectory;
54  import org.apache.lucene.util.Version;
55  import org.imixs.workflow.ItemCollection;
56  import org.imixs.workflow.Plugin;
57  import org.imixs.workflow.WorkflowContext;
58  import org.imixs.workflow.exceptions.PluginException;
59  import org.imixs.workflow.jee.ejb.EntityService;
60  import org.imixs.workflow.jee.ejb.WorkflowService;
61  import org.imixs.workflow.plugins.AbstractPlugin;
62  
63  /**
64   * This Plugin add workitems to a lucene search index. The Plugin provides a set
65   * of static methods which can be used also outside the workflowManager to index
66   * single workitems or collections of workitems.
67   * 
68   * With the method addWorkitem() a ItemCollection can be added to a lucene
69   * search index. The Plugin reads the property file 'imixs-search.properties'
70   * from the current classpath to determine the configuration.
71   * 
72   * <ul>
73   * <li>The property "IndexDir" defines the location of the lucene index
74   * <li>The property "FulltextFieldList" lists all fields which should be
75   * searchable after a workitem was updated
76   * <li>The property "IndexFieldList" lists all fields which should be indexed as
77   * keywords by the lucene search engine
78   * 
79   * If the plugin is used as worflow pugin in the model definition, the plugin
80   * should be run last to be sure that newly computed values like the worklfow
81   * status or the wokflowSummary are indexed correctly
82   * 
83   * @author rsoika
84   * 
85   */
86  public class LucenePlugin extends AbstractPlugin {
87  	// Properties properties = null;
88  	IndexWriter writer = null;
89  	static List<String> searchFieldList = null;
90  	static List<String> indexFieldListAnalyse = null;
91  	static List<String> indexFieldListNoAnalyse = null;
92  	private static Logger logger = Logger.getLogger("org.imixs.workflow");
93  
94  	@Override
95  	public void init(WorkflowContext actx) throws PluginException {
96  		super.init(actx);
97  	}
98  
99  	/**
100 	 * This method adds the current workitem to the search index by calling the
101 	 * method addWorkitem. The method computes temporarily the field $processid
102 	 * based on the numnextprocessid from teh activty entity. This will ensure
103 	 * that the workitem is indexed correctly on the $processid the workitem
104 	 * will hold after the process step is completed.
105 	 * 
106 	 * If and how the workitem will be added to the search index is fully
107 	 * controlled by the method addWorkitem.
108 	 */
109 	public int run(ItemCollection documentContext, ItemCollection activity)
110 			throws PluginException {
111 
112 		// compute next $processid to be added correctly into the search index
113 		int nextProcessID = activity.getItemValueInteger("numnextprocessid");
114 		int currentProcessID = documentContext
115 				.getItemValueInteger("$processid");
116 		// temporarily replace the $processid
117 		try {
118 			documentContext.replaceItemValue("$processid", nextProcessID);
119 
120 			// add the current Worktitem to the search index
121 			addWorkitem(documentContext);
122 			// restore $processid
123 			documentContext.replaceItemValue("$processid", currentProcessID);
124 
125 		} catch (Exception e) {
126 			throw new PluginException(e.getMessage());
127 		}
128 		return Plugin.PLUGIN_OK;
129 	}
130 
131 	public void close(int status) throws PluginException {
132 
133 	}
134 
135 	/**
136 	 * This method adds a single workitem into the search index. The adds the
137 	 * workitem into a empty Collection and calls teh method addWorklist.
138 	 * 
139 	 * @param documentContext
140 	 * @return
141 	 * @throws Exception
142 	 */
143 	public static boolean addWorkitem(ItemCollection documentContext)
144 			throws Exception {
145 		List<ItemCollection> workitems = new ArrayList<ItemCollection>();
146 
147 		workitems.add(documentContext);
148 
149 		addWorklist(workitems);
150 
151 		return true;
152 	}
153 
154 	/**
155 	 * This method adds a collection of workitems into the search index. The
156 	 * method loads the imixs-search.properties file from the classpath. If no
157 	 * properties are defined the method terminates. For each workitem the
158 	 * method test if it did match the conditions to be added into the search
159 	 * index. If the workitem did not match the conditions the workitem will be
160 	 * removed from the index.
161 	 * 
162 	 * 
163 	 * @param worklist
164 	 * @return
165 	 * @throws Exception
166 	 */
167 	public static boolean addWorklist(Collection<ItemCollection> worklist)
168 			throws Exception {
169 		// try loading imixs-search properties
170 		Properties prop = loadProperties();
171 		if (prop.isEmpty())
172 			return false;
173 
174 		IndexWriter awriter = createIndexWriter(prop);
175 
176 		// add workitem to search index....
177 		try {
178 
179 			for (ItemCollection workitem : worklist) {
180 				// create term
181 				Term term = new Term("$uniqueid",
182 						workitem.getItemValueString("$uniqueid"));
183 				// test if document should be indexed or not
184 				if (matchConditions(prop, workitem))
185 					awriter.updateDocument(term, createDocument(prop, workitem));
186 				else
187 					awriter.deleteDocuments(term);
188 
189 			}
190 		} catch (Exception luceneEx) {
191 			// close writer!
192 			logger.warning(" Lucene Exception : " + luceneEx.getMessage());
193 			throw luceneEx;
194 		} finally {
195 			logger.fine(" close writer");
196 			awriter.optimize();
197 			awriter.close();
198 		}
199 
200 		logger.fine(" update worklist successfull");
201 		return true;
202 	}
203 
204 	/**
205 	 * test if the workitem matches the conditions to be added into the search
206 	 * index. The Property keys MatchingType and MatchingProcessID can provide
207 	 * regular expressions
208 	 * 
209 	 * @param aworktiem
210 	 * @return
211 	 */
212 	public static boolean matchConditions(Properties prop,
213 			ItemCollection aworktiem) {
214 
215 		String typePattern = prop.getProperty("MatchingType");
216 		String processIDPattern = prop.getProperty("MatchingProcessID");
217 
218 		String type = aworktiem.getItemValueString("Type");
219 		String sPid = aworktiem.getItemValueInteger("$Processid") + "";
220 
221 		// test type pattern
222 		if (typePattern != null && !"".equals(typePattern)
223 				&& !type.matches(typePattern))
224 			return false;
225 
226 		// test $processid pattern
227 		if (processIDPattern != null && !"".equals(processIDPattern)
228 				&& !sPid.matches(processIDPattern))
229 			return false;
230 
231 		return true;
232 	}
233 
234 	/**
235 	 * Returns a ItemCollection List matching the provided search term. The
236 	 * provided search team will we extended with a users roles to test the read
237 	 * access level of each workitem matching the search term. The usernames and
238 	 * user roles will be search lowercase!
239 	 * 
240 	 * @param sSearchTerm
241 	 * @param workflowService
242 	 * @return collection of search result
243 	 * @throws Exception
244 	 */
245 	public static List<ItemCollection> search(String sSearchTerm,
246 			WorkflowService workflowService) throws Exception {
247 
248 		ArrayList<ItemCollection> workitems = new ArrayList<ItemCollection>();
249 
250 		// test if searchtem is provided
251 		if (sSearchTerm == null || "".equals(sSearchTerm))
252 			return workitems;
253 
254 		long ltime = System.currentTimeMillis();
255 		Properties prop = loadProperties();
256 		if (prop.isEmpty())
257 			return workitems;
258 
259 		String sIndexDir = prop.get("IndexDir") + "";
260 		Directory directory;
261 		try {
262 			directory = FSDirectory.open(new File(sIndexDir));
263 
264 			IndexSearcher searcher = new IndexSearcher(directory, true);
265 
266 			// Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
267 			Analyzer analyzer = new org.apache.lucene.analysis.KeywordAnalyzer();
268 			QueryParser parser = new QueryParser(Version.LUCENE_31, "content",
269 					analyzer);
270 
271 			// extend the Search Term
272 			if (!workflowService
273 					.isUserInRole(EntityService.ACCESSLEVEL_MANAGERACCESS)) {
274 				// get user names list
275 				List<String> userNameList = workflowService.getUserNameList();
276 				// create search term
277 				String sAccessTerm = "($readaccess:ANONYMOUS";
278 				for (String aRole : userNameList) {
279 					if (!"".equals(aRole))
280 						sAccessTerm += " $readaccess:\"" + aRole + "\"";
281 				}
282 				sAccessTerm += ") AND ";
283 				sSearchTerm = sAccessTerm + sSearchTerm;
284 			}
285 			logger.info("  lucene search:" + sSearchTerm);
286 
287 			if (!"".equals(sSearchTerm)) {
288 				parser.setAllowLeadingWildcard(true);
289 				// parser.setDefaultOperator(Operator.AND);
290 				TopDocs topDocs = searcher.search(parser.parse(sSearchTerm),
291 						1000);
292 
293 				logger.fine("  total hits=" + topDocs.totalHits);
294 
295 				// Get an array of references to matched documents
296 				ScoreDoc[] scoreDosArray = topDocs.scoreDocs;
297 				for (ScoreDoc scoredoc : scoreDosArray) {
298 					// Retrieve the matched document and show relevant details
299 					Document doc = searcher.doc(scoredoc.doc);
300 
301 					String sID = doc.get("$uniqueid");
302 					logger.fine("  lucene $uniqueid=" + sID);
303 					ItemCollection itemCol = workflowService.getEntityService()
304 							.load(sID);
305 					if (itemCol != null) {
306 						workitems.add(itemCol);
307 					}
308 				}
309 
310 			}
311 
312 			searcher.close();
313 			directory.close();
314 
315 			logger.info(" lucene serach: "
316 					+ (System.currentTimeMillis() - ltime) + " ms");
317 		} catch (Exception e) {
318 			logger.warning("  lucene error!");
319 			e.printStackTrace();
320 		}
321 
322 		return workitems;
323 	}
324 
325 	/**
326 	 * This method creates a lucene document based on a ItemCollection. The
327 	 * Method creates for each field specified in the FieldList a separate index
328 	 * field for the lucene document.
329 	 * 
330 	 * The property 'AnalyzeIndexFields' defines if a indexfield value should by
331 	 * analyzed by the Lucene Analyzer (default=false)
332 	 * 
333 	 * @param aworkitem
334 	 * @return
335 	 */
336 	public static Document createDocument(Properties prop,
337 			ItemCollection aworkitem) {
338 		String sValue = null;
339 		Document doc = new Document();
340 
341 		// combine all search fields from the search field list into one field
342 		// ('content')
343 		// for the lucene document
344 		String sContent = "";
345 		for (String aFieldname : searchFieldList) {
346 			sValue = "";
347 			// check value list - skip empty fields
348 			List vValues = aworkitem.getItemValue(aFieldname);
349 			if (vValues.size() == 0)
350 				continue;
351 			// get all values of a value list field
352 			for (Object o : vValues) {
353 				if (o == null)
354 					// skip null values
355 					continue;
356 
357 				if (o instanceof Calendar || o instanceof Date) {
358 					SimpleDateFormat dateformat = new SimpleDateFormat(
359 							"yyyyMMddHHmm");
360 					// convert calendar to string
361 					String sDateValue;
362 					if (o instanceof Calendar)
363 						sDateValue = dateformat
364 								.format(((Calendar) o).getTime());
365 					else
366 						sDateValue = dateformat.format((Date) o);
367 					sValue += sDateValue + ",";
368 
369 				} else
370 					// simple string representation
371 					sValue += o.toString() + ",";
372 			}
373 			if (sValue != null) {
374 				logger.fine("  add SearchField: " + aFieldname + " = " + sValue);
375 				sContent += sValue + ",";
376 			}
377 		}
378 		logger.fine("  content = " + sContent);
379 		doc.add(new Field("content", sContent, Field.Store.NO,
380 				Field.Index.ANALYZED));
381 
382 		// add each field from the indexFieldList into the lucene document
383 		for (String aFieldname : indexFieldListAnalyse) {
384 			addFieldValue(doc, aworkitem, aFieldname, true);
385 		}
386 
387 		for (String aFieldname : indexFieldListNoAnalyse) {
388 			addFieldValue(doc, aworkitem, aFieldname, false);
389 		}
390 
391 		// add default value $uniqueid
392 		doc.add(new Field("$uniqueid", aworkitem
393 				.getItemValueString("$uniqueid"), Field.Store.YES,
394 				Field.Index.NOT_ANALYZED));
395 
396 		// add default values $readAccess
397 		List<String> vReadAccess = aworkitem.getItemValue("$readAccess");
398 		if (vReadAccess.size() == 0
399 				|| (vReadAccess.size() == 1 && "".equals(vReadAccess.get(0)
400 						.toString()))) {
401 			sValue = "ANONYMOUS";
402 			doc.add(new Field("$readaccess", sValue, Field.Store.NO,
403 					Field.Index.NOT_ANALYZED_NO_NORMS));
404 		} else {
405 			sValue = "";
406 			// add each role / username as a single field value
407 			for (String sReader : vReadAccess)
408 				doc.add(new Field("$readaccess", sReader, Field.Store.NO,
409 						Field.Index.NOT_ANALYZED_NO_NORMS));
410 
411 		}
412 		return doc;
413 	}
414 
415 	/**
416 	 * adds a field value into a lucene document
417 	 * 
418 	 * @param doc
419 	 *            an existing lucene document
420 	 * @param aworkitem
421 	 *            the workitem containg the values
422 	 * @param aFieldname
423 	 *            the Fieldname inside the workitem
424 	 * @param analyzeValue
425 	 *            indicates if the value should be parsed by the analyzer
426 	 */
427 	private static void addFieldValue(Document doc, ItemCollection aworkitem,
428 			String aFieldname, boolean analyzeValue) {
429 		String sValue = null;
430 		List vValues = aworkitem.getItemValue(aFieldname);
431 		if (vValues.size() == 0)
432 			return;
433 		if (vValues.get(0) == null)
434 			return;
435 
436 		for (Object singleValue : vValues) {
437 
438 			// Object o = vValues.firstElement();
439 			if (singleValue instanceof Calendar || singleValue instanceof Date) {
440 				SimpleDateFormat dateformat = new SimpleDateFormat(
441 						"yyyyMMddHHmm");
442 
443 				// convert calendar to string
444 				String sDateValue;
445 				if (singleValue instanceof Calendar)
446 					sDateValue = dateformat.format(((Calendar) singleValue)
447 							.getTime());
448 				else
449 					sDateValue = dateformat.format((Date) singleValue);
450 				sValue = sDateValue;
451 
452 			} else
453 				// simple string representation
454 				sValue = singleValue.toString();
455 
456 			logger.fine("  add IndexField (analyse=" + analyzeValue + "): "
457 					+ aFieldname + " = " + sValue);
458 			if (analyzeValue)
459 				doc.add(new Field(aFieldname, sValue, Field.Store.NO,
460 						Field.Index.ANALYZED));
461 			else
462 				// do not nalyse content of index fields!
463 				doc.add(new Field(aFieldname, sValue, Field.Store.NO,
464 						Field.Index.NOT_ANALYZED));
465 
466 		}
467 
468 	}
469 
470 	/**
471 	 * This method creates a new instance of a lucene IndexWriter. The timeout
472 	 * to wait for a write lock is set to 10 seconds.
473 	 * 
474 	 * @return
475 	 * @throws Exception
476 	 */
477 	public static IndexWriter createIndexWriter(Properties prop)
478 			throws Exception {
479 
480 		String sIndexDir = prop.get("IndexDir") + "";
481 		String sFulltextFieldList = prop.get("FulltextFieldList") + "";
482 		String sIndexFieldListAnalyse = prop.get("IndexFieldListAnalyze") + "";
483 		String sIndexFieldListNoAnalyse = prop.get("IndexFieldListNoAnalyze")
484 				+ "";
485 
486 		logger.fine("IndexDir:" + sIndexDir);
487 		logger.fine("FulltextFieldList:" + sFulltextFieldList);
488 		logger.fine("IndexFieldListAnalyse:" + sIndexFieldListAnalyse);
489 		logger.fine("IndexFieldListNoAnalyse:" + sIndexFieldListNoAnalyse);
490 		// compute search field list
491 		StringTokenizer st = new StringTokenizer(sFulltextFieldList, ",");
492 		searchFieldList = new ArrayList<String>();
493 		while (st.hasMoreElements()) {
494 			String sName = st.nextToken().toLowerCase();
495 			// do not add internal fields
496 			if (!"$uniqueid".equals(sName) && !"$readaccess".equals(sName))
497 				searchFieldList.add(sName);
498 		}
499 
500 		// compute Index field list (Analyze)
501 		st = new StringTokenizer(sIndexFieldListAnalyse, ",");
502 		indexFieldListAnalyse = new ArrayList<String>();
503 		while (st.hasMoreElements()) {
504 			String sName = st.nextToken().toLowerCase();
505 			// do not add internal fields
506 			if (!"$uniqueid".equals(sName) && !"$readaccess".equals(sName))
507 				indexFieldListAnalyse.add(sName);
508 		}
509 
510 		// compute Index field list (Analyze)
511 		st = new StringTokenizer(sIndexFieldListNoAnalyse, ",");
512 		indexFieldListNoAnalyse = new ArrayList<String>();
513 		while (st.hasMoreElements()) {
514 			String sName = st.nextToken().toLowerCase();
515 			// do not add internal fields
516 			if (!"$uniqueid".equals(sName) && !"$readaccess".equals(sName))
517 				indexFieldListNoAnalyse.add(sName);
518 		}
519 
520 		// initialize lucene index writer
521 		// Directory indexDir = new SimpleFSDirectory(new File(sIndexDir));
522 		Directory indexDir = FSDirectory.open(new File(sIndexDir));
523 
524 		// KeywordAnalyzer StandardAnalyzer
525 		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
526 		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,
527 				analyzer);
528 		// set the WriteLockTimeout to wait for a write lock (in milliseconds)
529 		// for this instance.
530 		// 10 seconds!
531 		iwc.setWriteLockTimeout(10000);
532 
533 		// there is no need to unlock the index if we set the timeout to 10
534 		// seconds
535 		// if (IndexWriter.isLocked(indexDir)) {
536 		// logger.warning("Lucene IndexWriter was locked! - try to unlock....");
537 		// IndexWriter.unlock(indexDir);
538 		// }
539 		return new IndexWriter(indexDir, iwc);
540 
541 	}
542 
543 	public static IndexSearcher createIndexSearcher() throws Exception {
544 		Directory directory;
545 
546 		Properties properties = LucenePlugin.loadProperties();
547 		String sIndexDir = properties.get("IndexDir") + "";
548 
549 		directory = FSDirectory.open(new File(sIndexDir));
550 
551 		return new IndexSearcher(directory, true);
552 	}
553 
554 	/**
555 	 * loads a imixs-search.property file
556 	 * 
557 	 * @return
558 	 * @throws Exception
559 	 */
560 	public static Properties loadProperties() throws Exception {
561 		// try loading imixs-search properties
562 		Properties prop = new Properties();
563 		try {
564 			prop.load(Thread.currentThread().getContextClassLoader()
565 					.getResource("imixs-search.properties").openStream());
566 		} catch (Exception ep) {
567 			// no properties found
568 		}
569 		return prop;
570 	}
571 
572 }