Index: etc/database_schema.sql =================================================================== RCS file: /cvsroot/dspace/dspace/etc/database_schema.sql,v retrieving revision 1.43 diff -b -w -u -r1.43 database_schema.sql --- etc/database_schema.sql 6 Apr 2006 06:06:54 -0000 1.43 +++ etc/database_schema.sql 4 Jan 2007 08:08:33 -0000 @@ -116,6 +116,8 @@ CREATE SEQUENCE group2group_seq; CREATE SEQUENCE group2groupcache_seq; +CREATE SEQUENCE InternalAIP_seq; +CREATE SEQUENCE InternalAIP2Bitstream_seq; ------------------------------------------------------- -- BitstreamFormatRegistry table @@ -860,6 +862,20 @@ 'Bitstream marked deleted in bitstream table' ); +------------------------------------------------------- +-- Internal AIP tables +------------------------------------------------------- +CREATE TABLE InternalAIP +( + internalaip_id INTEGER NOT NULL PRIMARY KEY, + handle_id INTEGER UNIQUE REFERENCES Handle(handle_id), + aip_bitstream_id INTEGER REFERENCES Bitstream(bitstream_id), + updated TIMESTAMP WITH TIME ZONE +); - - +CREATE TABLE InternalAIP2Bitstream +( + internalaip2bitstream_id INTEGER NOT NULL PRIMARY KEY, + internalaip_id INTEGER REFERENCES InternalAIP(internalaip_id), + bitstream_id INTEGER REFERENCES Bitstream(bitstream_id) +); Index: etc/update-sequences.sql =================================================================== RCS file: /cvsroot/dspace/dspace/etc/update-sequences.sql,v retrieving revision 1.9 diff -b -w -u -r1.9 update-sequences.sql --- etc/update-sequences.sql 20 Dec 2005 13:49:24 -0000 1.9 +++ etc/update-sequences.sql 4 Jan 2007 08:08:33 -0000 @@ -92,3 +92,5 @@ SELECT setval('metadatafieldregistry_seq', max(metadata_field_id)) FROM metadatafieldregistry; SELECT setval('metadatavalue_seq', max(metadata_value_id)) FROM metadatavalue; SELECT setval('metadataschemaregistry_seq', max(metadata_schema_id)) FROM metadataschemaregistry; \ No newline at end of file +SELECT setval('internalaip_seq', max(internalaip_id)) FROM internalaip; +SELECT setval('internalaip2bitstream_seq', max(internalaip2bitstream_id)) FROM internalaip2bitstream; Index: src/org/dspace/content/BitstreamFormat.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/BitstreamFormat.java,v retrieving revision 1.24 diff -b -w -u -r1.24 BitstreamFormat.java --- src/org/dspace/content/BitstreamFormat.java 10 Nov 2006 19:39:17 -0000 1.24 +++ src/org/dspace/content/BitstreamFormat.java 4 Jan 2007 08:08:33 -0000 @@ -85,6 +85,13 @@ */ public static final int SUPPORTED = 2; + + /** translate support-level ID to string. MUST keep this table in sync + * with support level definitions above. + */ + public static final String supportLevelText[] = + { "UNKNOWN", "KNOWN", "SUPPORTED" }; + /** Our context */ private Context bfContext; @@ -92,7 +99,7 @@ private TableRow bfRow; /** File extensions for this format */ - private List extensions; + private List extensions; /** * Class constructor for creating a BitstreamFormat object based on the @@ -108,7 +115,7 @@ { bfContext = context; bfRow = row; - extensions = new ArrayList(); + extensions = new ArrayList(); TableRowIterator tri = DatabaseManager.query(context, "SELECT * FROM fileextension WHERE bitstream_format_id= ? ", @@ -293,7 +300,7 @@ public static BitstreamFormat[] findAll(Context context) throws SQLException { - List formats = new ArrayList(); + List formats = new ArrayList(); TableRowIterator tri = DatabaseManager.queryTable(context, "bitstreamformatregistry", "SELECT * FROM bitstreamformatregistry ORDER BY bitstream_format_id"); @@ -340,7 +347,7 @@ public static BitstreamFormat[] findNonInternal(Context context) throws SQLException { - List formats = new ArrayList(); + List formats = new ArrayList(); String myQuery = "SELECT * FROM bitstreamformatregistry WHERE internal='0' " + "AND short_description NOT LIKE 'Unknown' " @@ -653,11 +660,34 @@ */ public void setExtensions(String[] exts) { - extensions = new ArrayList(); + extensions = new ArrayList(); for (int i = 0; i < exts.length; i++) { extensions.add(exts[i]); } } + + /** + * If you know the support level string, look up the corresponding type ID + * constant. + * + * @param action + * String with the name of the action (must be exact match) + * + * @return the corresponding action ID, or -1 if the action + * string is unknown + */ + public static int getSupportLevelID(String slevel) + { + for (int i = 0; i < supportLevelText.length; i++) + { + if (supportLevelText[i].equals(slevel)) + { + return i; + } + } + + return -1; + } } Index: src/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java,v retrieving revision 1.2 diff -b -w -u -r1.2 MODSDisseminationCrosswalk.java --- src/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java 27 Mar 2006 02:57:09 -0000 1.2 +++ src/org/dspace/content/crosswalk/MODSDisseminationCrosswalk.java 4 Jan 2007 08:08:33 -0000 @@ -1,9 +1,9 @@ /* * MODSDisseminationCrosswalk.java * - * Version: $Revision: 1.2 $ + * Version: $Revision: 1.1 $ * - * Date: $Date: 2006/03/27 02:57:09 $ + * Date: $Date: 2006/03/28 23:12:56 $ * * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts * Institute of Technology. All rights reserved. @@ -40,7 +40,6 @@ package org.dspace.content.crosswalk; -import java.io.InputStream; import java.io.IOException; import java.sql.SQLException; import java.util.Iterator; @@ -49,18 +48,16 @@ import java.util.HashMap; import java.util.Properties; import java.util.Enumeration; -import java.io.OutputStream; import java.io.StringReader; import java.io.File; import java.io.FileInputStream; -import java.sql.SQLException; import org.apache.log4j.Logger; -import org.dspace.core.Context; import org.dspace.core.Constants; +import org.dspace.content.Collection; +import org.dspace.content.Community; import org.dspace.content.Item; -import org.dspace.content.DCDate; import org.dspace.content.DCValue; import org.dspace.content.DSpaceObject; import org.dspace.authorize.AuthorizeException; @@ -71,7 +68,6 @@ import org.jdom.output.XMLOutputter; import org.jdom.output.Format; import org.jdom.input.SAXBuilder; -import org.jdom.input.JDOMParseException; import org.jdom.xpath.XPath; /** @@ -106,7 +102,8 @@ * by default. * * @author Larry Stone - * @version $Revision: 1.2 $ + * @author Scott Phillips + * @version $Revision: 1.1 $ */ public class MODSDisseminationCrosswalk extends SelfNamedPlugin implements DisseminationCrosswalk @@ -158,7 +155,6 @@ MODS_NS.getURI()+" "+MODS_XSD; private static XMLOutputter outputUgly = new XMLOutputter(); - private static XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); private static SAXBuilder builder = new SAXBuilder(); private HashMap modsMap = null; @@ -293,11 +289,17 @@ } } + /** + * Return the MODS namespace + */ public Namespace[] getNamespaces() { return namespaces; } + /** + * Return the MODS schema + */ public String getSchemaLocation() { return schemaLocation; @@ -313,6 +315,9 @@ return disseminateListInternal(dso, true); } + /** + * Disseminate an Item, Collection, or Community to MODS. + */ public Element disseminateElement(DSpaceObject dso) throws CrosswalkException, IOException, SQLException, AuthorizeException @@ -324,23 +329,38 @@ } private List disseminateListInternal(DSpaceObject dso, boolean addSchema) - throws CrosswalkException, - IOException, SQLException, AuthorizeException + throws CrosswalkException, IOException, SQLException, AuthorizeException + { + DCValue[] dcvs = null; + if (dso.getType() == Constants.ITEM) { - if (dso.getType() != Constants.ITEM) - throw new CrosswalkObjectNotSupported("MODSDisseminationCrosswalk can only crosswalk an Item."); - Item item = (Item)dso; + dcvs = item2Metadata((Item) dso); + } + else if (dso.getType() == Constants.COLLECTION) + { + dcvs = collection2Metadata((Collection) dso); + } + else if (dso.getType() == Constants.COMMUNITY) + { + dcvs = community2Metadata((Community) dso); + } + else + { + throw new CrosswalkObjectNotSupported( + "MODSDisseminationCrosswalk can only crosswalk Items, Collections, or Communities"); + } initMap(); - DCValue[] dc = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY); - List result = new ArrayList(dc.length); - for (int i = 0; i < dc.length; i++) - { - // Compose qualified DC name - schema.element[.qualifier] - // e.g. "dc.title", "dc.subject.lcc", "lom.Classification.Keyword" - String qdc = dc[i].schema+"."+ - ((dc[i].qualifier == null) ? dc[i].element - : (dc[i].element + "." + dc[i].qualifier)); + List result = new ArrayList(dcvs.length); + + for(int i=0; i < dcvs.length; i++) + { + String qdc = dcvs[i].schema + "." + dcvs[i].element; + if (dcvs[i].qualifier != null) + { + qdc += "." + dcvs[i].qualifier; + } + String value = dcvs[i].value; modsTriple trip = (modsTriple)modsMap.get(qdc); if (trip == null) @@ -362,11 +382,11 @@ { Object what = ni.next(); if (what instanceof Element) - ((Element)what).setText(dc[i].value); + ((Element)what).setText(checkedString(value)); else if (what instanceof Attribute) - ((Attribute)what).setValue(dc[i].value); + ((Attribute)what).setValue(checkedString(value)); else if (what instanceof Text) - ((Text)what).setText(dc[i].value); + ((Text)what).setText(checkedString(value)); else log.warn("Got unknown object from XPath, class="+what.getClass().getName()); } @@ -384,13 +404,161 @@ return result; } + /** + * ModsCrosswalk can disseminate: Items, collections, and Communities. + */ public boolean canDisseminate(DSpaceObject dso) { + if (dso.getType() == Constants.ITEM || + dso.getType() == Constants.COLLECTION || + dso.getType() == Constants.COMMUNITY) return true; + else + return false; } + /** + * ModsCrosswalk prefer's element form over list. + */ public boolean preferList() { return false; } + + + /** + * Generate a list of metadata elements for the given DSpace + * community. + * + * @param community + * The community to derive metadata from + */ + protected DCValue[] community2Metadata(Community community) + { + List metadata = new ArrayList(); + + String description = community.getMetadata("introductory_text"); + String description_abstract = community + .getMetadata("short_description"); + String description_table = community.getMetadata("side_bar_text"); + String identifier_uri = "http://hdl.handle.net/" + + community.getHandle(); + String rights = community.getMetadata("copyright_text"); + String title = community.getMetadata("name"); + + if (description != null) + metadata.add(createDCValue("description", null, description)); + + if (description_abstract != null) + metadata.add(createDCValue("description", "abstract", description_abstract)); + + if (description_table != null) + metadata.add(createDCValue("description", "tableofcontents", description_table)); + + if (identifier_uri != null) + metadata.add(createDCValue("identifier.uri", null, identifier_uri)); + + if (rights != null) + metadata.add(createDCValue("rights", null, rights)); + + if (title != null) + metadata.add(createDCValue("title", null, title)); + + return (DCValue[]) metadata.toArray(new DCValue[metadata.size()]); + } + + /** + * Generate a list of metadata elements for the given DSpace + * collection. + * + * @param collection + * The collection to derive metadata from + */ + protected DCValue[] collection2Metadata(Collection collection) + { + List metadata = new ArrayList(); + + String description = collection.getMetadata("introductory_text"); + String description_abstract = collection + .getMetadata("short_description"); + String description_table = collection.getMetadata("side_bar_text"); + String identifier_uri = "http://hdl.handle.net/" + + collection.getHandle(); + String provenance = collection.getMetadata("provenance_description"); + String rights = collection.getMetadata("copyright_text"); + String rights_license = collection.getMetadata("license"); + String title = collection.getMetadata("name"); + + if (description != null) + metadata.add(createDCValue("description",null, description)); + + if (description_abstract != null) + metadata.add(createDCValue("description","abstract",description_abstract)); + + if (description_table != null) + metadata.add(createDCValue("description","tableofcontents",description_table)); + + if (identifier_uri != null) + metadata.add(createDCValue("identifier","uri", identifier_uri)); + + if (provenance != null) + metadata.add(createDCValue("provenance", null, provenance)); + + if (rights != null) + metadata.add(createDCValue("rights", null, rights)); + + if (rights_license != null) + metadata.add(createDCValue("rights.license", null, rights_license)); + + if (title != null) + metadata.add(createDCValue("title", null, title)); + + return (DCValue[]) metadata.toArray(new DCValue[metadata.size()]); + } + + /** + * Generate a list of metadata elements for the given DSpace item. + * + * @param item + * The item to derive metadata from + */ + protected DCValue[] item2Metadata(Item item) + { + DCValue[] dcvs = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, + Item.ANY); + + return dcvs; + } + + private DCValue createDCValue(String element, String qualifier, String value) { + DCValue dcv = new DCValue(); + dcv.schema = "dc"; + dcv.element = element; + dcv.qualifier = qualifier; + dcv.value = value; + return dcv; + } + + // check for non-XML characters + private String checkedString(String value) + { + if (value == null) + return null; + String reason = Verifier.checkCharacterData(value); + if (reason == null) + return value; + else + { + if (log.isDebugEnabled()) + log.debug("Filtering out non-XML characters in string, reason="+reason); + StringBuffer result = new StringBuffer(value.length()); + for (int i = 0; i < value.length(); ++i) + { + char c = value.charAt(i); + if (Verifier.isXMLCharacter((int)c)) + result.append(c); + } + return result.toString(); + } + } } Index: src/org/dspace/content/crosswalk/XSLTCrosswalk.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/crosswalk/XSLTCrosswalk.java,v retrieving revision 1.1 diff -b -w -u -r1.1 XSLTCrosswalk.java --- src/org/dspace/content/crosswalk/XSLTCrosswalk.java 17 Mar 2006 00:04:38 -0000 1.1 +++ src/org/dspace/content/crosswalk/XSLTCrosswalk.java 4 Jan 2007 08:08:33 -0000 @@ -46,6 +46,7 @@ import java.util.Iterator; import java.util.List; import java.util.ArrayList; +import java.util.Map; import java.util.HashMap; import java.util.Properties; import java.util.Enumeration; @@ -151,6 +152,32 @@ private final static String CONFIG_STYLESHEET = ".stylesheet"; + // Map of metadata elements for Communities and Collections + // Format is alternating key/value in a straight array; use this + // to initialize hash tables that convert to and from. + private final static String ccMetadataMap[] = + { + // getMetadata() -> DC element.term + "name", "dc.title", + "introductory_text", "dc.description", + "short_description", "dc.description.abstract", + "side_bar_text", "dc.description.tableofcontents", + "copyright_text", "dc.rights", + "provenance_description", "dc.provenance", + "license", "dc.rights.license" + }; + + protected static Map ccMetatdataToDC = new HashMap(); + protected static Map ccDCToMetadata = new HashMap(); + static + { + for (int i = 0; i < ccMetadataMap.length; i += 2) + { + ccMetatdataToDC.put(ccMetadataMap[i], ccMetadataMap[i+1]); + ccDCToMetadata.put(ccMetadataMap[i+1], ccMetadataMap[i]); + } + } + /** * Derive list of plugin name from DSpace configuration entries * for crosswalks. The direction parameter should be either @@ -165,6 +192,7 @@ List aliasList = new ArrayList(); Enumeration pe = ConfigurationManager.propertyNames(); + if (log.isDebugEnabled()) log.debug("XSLTCrosswalk: Looking for config prefix = "+prefix); while (pe.hasMoreElements()) { @@ -220,6 +248,7 @@ { try { + if (log.isDebugEnabled()) log.debug((transformer == null ? "Loading " : "Reloading")+ getPluginInstanceName()+" XSLT stylesheet from "+transformerFile.toString()); transformer = new XSLTransformer(transformerFile); Index: src/org/dspace/content/crosswalk/XSLTDisseminationCrosswalk.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/crosswalk/XSLTDisseminationCrosswalk.java,v retrieving revision 1.1 diff -b -w -u -r1.1 XSLTDisseminationCrosswalk.java --- src/org/dspace/content/crosswalk/XSLTDisseminationCrosswalk.java 17 Mar 2006 00:04:38 -0000 1.1 +++ src/org/dspace/content/crosswalk/XSLTDisseminationCrosswalk.java 4 Jan 2007 08:08:33 -0000 @@ -1,9 +1,9 @@ /* * XSLTDisseminationCrosswalk.java * - * Version: $Revision: 1.1 $ + * Version: $Revision: 1.2 $ * - * Date: $Date: 2006/03/17 00:04:38 $ + * Date: $Date: 2006/03/20 22:41:52 $ * * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts * Institute of Technology. All rights reserved. @@ -40,39 +40,24 @@ package org.dspace.content.crosswalk; -import java.io.InputStream; import java.io.IOException; import java.sql.SQLException; -import java.util.Iterator; import java.util.List; import java.util.ArrayList; -import java.util.HashMap; -import java.util.Properties; import java.util.Enumeration; -import java.io.OutputStream; -import java.io.StringReader; -import java.io.File; -import java.io.FileInputStream; -import java.sql.SQLException; import org.apache.log4j.Logger; -import org.dspace.core.Context; import org.dspace.core.Constants; +import org.dspace.content.Collection; +import org.dspace.content.Community; import org.dspace.content.Item; -import org.dspace.content.DCDate; import org.dspace.content.DCValue; import org.dspace.content.DSpaceObject; import org.dspace.authorize.AuthorizeException; import org.dspace.core.ConfigurationManager; -import org.dspace.core.SelfNamedPlugin; import org.jdom.*; -import org.jdom.output.XMLOutputter; -import org.jdom.output.Format; -import org.jdom.input.SAXBuilder; -import org.jdom.input.JDOMParseException; -import org.jdom.xpath.XPath; import org.jdom.transform.XSLTransformer; import org.jdom.transform.XSLTransformException; @@ -100,8 +85,9 @@ * * * @author Larry Stone - * @version $Revision: 1.1 $ - * @see XSLTCrosswalk + * @author Scott Phillips + * @version $Revision: 1.2 $ + * @see XsltCrosswalk */ public class XSLTDisseminationCrosswalk extends XSLTCrosswalk @@ -112,8 +98,6 @@ private final static String DIRECTION = "dissemination"; - private static XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); - private static String aliases[] = makeAliases(DIRECTION); public static String[] getPluginNames() @@ -127,8 +111,6 @@ private Namespace namespaces[] = null; - private String rootName = null; - private boolean preferList = false; // load the namespace and schema from config @@ -176,6 +158,11 @@ preferList = ConfigurationManager.getBooleanProperty(prefix+"preferList", false); } + /** + * Return the namespace used by this crosswalk. + * + * @see DisseminationCrosswalk + */ public Namespace[] getNamespaces() { try @@ -189,6 +176,11 @@ return namespaces; } + /** + * Return the schema location used by this crosswalk. + * + * @see DisseminationCrosswalk + */ public String getSchemaLocation() { try @@ -202,24 +194,34 @@ return schemaLocation; } + /** + * Dessiminate the DSpace item, collection, or community. + * + * @see DisseminationCrosswalk + */ public Element disseminateElement(DSpaceObject dso) throws CrosswalkException, IOException, SQLException, AuthorizeException { + int type = dso.getType(); + if (!(type == Constants.ITEM || + type == Constants.COLLECTION || + type == Constants.COMMUNITY)) + throw new CrosswalkObjectNotSupported("XSLTDisseminationCrosswalk can only crosswalk items, collections, and communities."); + init(); - if (dso.getType() != Constants.ITEM) - throw new CrosswalkObjectNotSupported("XSLTDisseminationCrosswalk can only crosswalk an Item."); - Item item = (Item)dso; XSLTransformer xform = getTransformer(DIRECTION); if (xform == null) throw new CrosswalkInternalException("Failed to initialize transformer, probably error loading stylesheet."); try { - Document ddim = new Document(getDim(item)); + Document ddim = new Document(createDIM(dso)); Document result = xform.transform(ddim); - return result.getRootElement(); + Element root = result.getRootElement(); + root.detach(); + return root; } catch (XSLTransformException e) { @@ -228,22 +230,30 @@ } } + /** + * Disseminate the DSpace item, collection, or community. + * + * @see DisseminationCrosswalk + */ public List disseminateList(DSpaceObject dso) throws CrosswalkException, IOException, SQLException, AuthorizeException { + int type = dso.getType(); + if (!(type == Constants.ITEM || + type == Constants.COLLECTION || + type == Constants.COMMUNITY)) + throw new CrosswalkObjectNotSupported("XSLTDisseminationCrosswalk can only crosswalk a items, collections, and communities."); + init(); - if (dso.getType() != Constants.ITEM) - throw new CrosswalkObjectNotSupported("XSLTDisseminationCrosswalk can only crosswalk an Item."); - Item item = (Item)dso; XSLTransformer xform = getTransformer(DIRECTION); if (xform == null) throw new CrosswalkInternalException("Failed to initialize transformer, probably error loading stylesheet."); try { - return xform.transform(getDim(item).getChildren()); + return xform.transform(createDIM(dso).getChildren()); } catch (XSLTransformException e) { @@ -252,33 +262,22 @@ } } - - // build DIM expression of Item's metadata. - private Element getDim(Item item) - { - DCValue[] dc = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY); - Element dim = new Element("dim", DIM_NS); - for (int i = 0; i < dc.length; i++) - { - Element field = new Element("field", DIM_NS); - field.setAttribute("mdschema", dc[i].schema); - field.setAttribute("element", dc[i].element); - if (dc[i].qualifier != null) - field.setAttribute("qualifier", dc[i].qualifier); - if (dc[i].language != null) - field.setAttribute("lang", dc[i].language); - if (dc[i].value != null) - field.setText(dc[i].value); - dim.addContent(field); - } - return dim; - } - + /** + * Determine is this crosswalk can dessiminate the given object. + * + * @see disseminationcrosswalk + */ public boolean canDisseminate(DSpaceObject dso) { return dso.getType() == Constants.ITEM; } + /** + * return true if this crosswalk prefers the list form over an singe + * element, otherwise false. + * + * @see disseminationcrosswalk + */ public boolean preferList() { try @@ -291,4 +290,137 @@ } return preferList; } + + /** + * Generate an intermediate representation of a DSpace object. + * + * @param dso The dspace object to build a representation of. + */ + public static Element createDIM(DSpaceObject dso, DCValue[] dcvs) + { + Element dim = new Element("dim", DIM_NS); + String type = Constants.typeText[dso.getType()]; + dim.setAttribute("dspaceType",type); + + for (int i = 0; i < dcvs.length; i++) + { + DCValue dcv = dcvs[i]; + Element field = + createField(dcv.schema, dcv.element, dcv.qualifier, + dcv.language, dcv.value); + dim.addContent(field); + } + return dim; + } + + /** + * Generate an intermediate representation of a DSpace object. + * + * @param dso The dspace object to build a representation of. + */ + public static Element createDIM(DSpaceObject dso) + { + if (dso.getType() == Constants.ITEM) + { + Item item = (Item) dso; + return createDIM(dso, item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY)); + } + else + { + Element dim = new Element("dim", DIM_NS); + String type = Constants.typeText[dso.getType()]; + dim.setAttribute("dspaceType",type); + + if (dso.getType() == Constants.COLLECTION) + { + Collection collection = (Collection) dso; + + String description = collection.getMetadata("introductory_text"); + String description_abstract = collection.getMetadata("short_description"); + String description_table = collection.getMetadata("side_bar_text"); + String identifier_uri = "hdl:" + collection.getHandle(); + String provenance = collection.getMetadata("provenance_description"); + String rights = collection.getMetadata("copyright_text"); + String rights_license = collection.getMetadata("license"); + String title = collection.getMetadata("name"); + + dim.addContent(createField("dc","description",null,null,description)); + dim.addContent(createField("dc","description","abstract",null,description_abstract)); + dim.addContent(createField("dc","description","tableofcontents",null,description_table)); + dim.addContent(createField("dc","identifier","uri",null,identifier_uri)); + dim.addContent(createField("dc","provenance",null,null,provenance)); + dim.addContent(createField("dc","rights",null,null,rights)); + dim.addContent(createField("dc","rights","license",null,rights_license)); + dim.addContent(createField("dc","title",null,null,title)); + } + else if (dso.getType() == Constants.COMMUNITY) + { + Community community = (Community) dso; + + String description = community.getMetadata("introductory_text"); + String description_abstract = community.getMetadata("short_description"); + String description_table = community.getMetadata("side_bar_text"); + String identifier_uri = "hdl:" + community.getHandle(); + String rights = community.getMetadata("copyright_text"); + String title = community.getMetadata("name"); + + dim.addContent(createField("dc","description",null,null,description)); + dim.addContent(createField("dc","description","abstract",null,description_abstract)); + dim.addContent(createField("dc","description","tableofcontents",null,description_table)); + dim.addContent(createField("dc","identifier","uri",null,identifier_uri)); + dim.addContent(createField("dc","rights",null,null,rights)); + dim.addContent(createField("dc","title",null,null,title)); + } + // XXX FIXME: Nothing to crosswalk for bitstream? + return dim; + } + } + + /** + * Create a new DIM field element with the given attributes. + * + * @param schema The schema the DIM field belongs too. + * @param element The element the DIM field belongs too. + * @param qualifier The qualifier the DIM field belongs too. + * @param language The language the DIM field belongs too. + * @param value The value of the DIM field. + * @return A new DIM field element + */ + private static Element createField(String schema, String element, String qualifier, String language, String value) + { + Element field = new Element("field",DIM_NS); + field.setAttribute("mdschema",schema); + field.setAttribute("element",element); + if (qualifier != null) + field.setAttribute("qualifier",qualifier); + if (language != null) + field.setAttribute("lang",language); + + field.setText(checkedString(value)); + + return field; + } + + // Return string with non-XML characters (i.e. low control chars) excised. + private static String checkedString(String value) + { + if (value == null) + return null; + String reason = Verifier.checkCharacterData(value); + if (reason == null) + return value; + else + { + if (log.isDebugEnabled()) + log.debug("Filtering out non-XML characters in string, reason="+reason); + StringBuffer result = new StringBuffer(value.length()); + for (int i = 0; i < value.length(); ++i) + { + char c = value.charAt(i); + if (Verifier.isXMLCharacter((int)c)) + result.append(c); + } + return result.toString(); + } + } } Index: src/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java,v retrieving revision 1.2 diff -b -w -u -r1.2 XSLTIngestionCrosswalk.java --- src/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java 25 Sep 2006 08:19:25 -0000 1.2 +++ src/org/dspace/content/crosswalk/XSLTIngestionCrosswalk.java 4 Jan 2007 08:08:33 -0000 @@ -60,6 +60,8 @@ import org.dspace.core.Context; import org.dspace.core.Constants; import org.dspace.content.Item; +import org.dspace.content.Collection; +import org.dspace.content.Community; import org.dspace.content.DCDate; import org.dspace.content.DCValue; import org.dspace.content.DSpaceObject; @@ -69,6 +71,7 @@ import org.dspace.core.ConfigurationManager; import org.dspace.core.SelfNamedPlugin; import org.dspace.core.PluginManager; +import org.dspace.handle.HandleManager; import org.jdom.*; import org.jdom.output.XMLOutputter; @@ -105,7 +108,7 @@ } // apply metadata values returned in DIM to the target item. - private void applyDim(List dimList, Item item) + private static void applyDim(List dimList, Item item) throws MetadataValidationException { Iterator di = dimList.iterator(); @@ -128,13 +131,18 @@ } // adds the metadata element from one - private void applyDimField(Element field, Item item) + private static void applyDimField(Element field, Item item) { String schema = field.getAttributeValue("mdschema"); String element = field.getAttributeValue("element"); String qualifier = field.getAttributeValue("qualifier"); String lang = field.getAttributeValue("lang"); + // sanity check: some XSL puts an empty string in qualifier, + // change it to null so we match the unqualified DC field: + if (qualifier != null && qualifier.equals("")) + qualifier = null; + item.addMetadata(schema, element, qualifier, lang, field.getText()); } @@ -148,17 +156,13 @@ throws CrosswalkException, IOException, SQLException, AuthorizeException { - if (dso.getType() != Constants.ITEM) - throw new CrosswalkObjectNotSupported("XsltSubmissionionCrosswalk can only crosswalk to an Item."); - Item item = (Item)dso; - XSLTransformer xform = getTransformer(DIRECTION); if (xform == null) throw new CrosswalkInternalException("Failed to initialize transformer, probably error loading stylesheet."); try { List dimList = xform.transform(metadata); - applyDim(dimList, item); + ingestDIM(context, dso, dimList); } catch (XSLTransformException e) { @@ -175,17 +179,14 @@ public void ingest(Context context, DSpaceObject dso, Element root) throws CrosswalkException, IOException, SQLException, AuthorizeException { - if (dso.getType() != Constants.ITEM) - throw new CrosswalkObjectNotSupported("XsltSubmissionionCrosswalk can only crosswalk to an Item."); - Item item = (Item)dso; - XSLTransformer xform = getTransformer(DIRECTION); if (xform == null) throw new CrosswalkInternalException("Failed to initialize transformer, probably error loading stylesheet."); try { Document dimDoc = xform.transform(new Document((Element)root.clone())); - applyDim(dimDoc.getRootElement().getChildren(), item); + ingestDIM(context, dso, dimDoc.getRootElement().getChildren()); + } catch (XSLTransformException e) { @@ -195,6 +196,87 @@ } + // return coll/comm "metadata" label corresponding to a DIM field. + private static String getMetadataForDIM(Element field) + { + // make up fieldname, then look for it in xwalk + String element = field.getAttributeValue("element"); + String qualifier = field.getAttributeValue("qualifier"); + String fname = "dc." + element; + if (qualifier != null) + fname += "." + qualifier; + return ((String)ccDCToMetadata.get(fname)); + } + + /** + * Ingest a DIM metadata expression directly, without + * translating some other format into DIM. + * The dim element is expected to be be the root of + * a DIM document. + *

+ * Note that this is ONLY implemented for Item, Collection, and + * Community objects. Also only works for the "dc" metadata schema. + *

+ * @param context the context + * @param dso object into which to ingest metadata + * @param dim root of a DIM expression + */ + + public static void ingestDIM(Context context, DSpaceObject dso, Element dim) + throws CrosswalkException, + IOException, SQLException, AuthorizeException + { + ingestDIM(context, dso, dim.getChildren()); + } + + public static void ingestDIM(Context context, DSpaceObject dso, List fields) + throws CrosswalkException, + IOException, SQLException, AuthorizeException + { + int type = dso.getType(); + if (type == Constants.ITEM) + { + Item item = (Item)dso; + applyDim(fields, item); + } + else if (type == Constants.COLLECTION || + type == Constants.COMMUNITY) + { + Iterator di = fields.iterator(); + while (di.hasNext()) + { + Element field = (Element)di.next(); + String schema = field.getAttributeValue("mdschema"); + if (field.getName().equals("dim") && + field.getNamespace().equals(DIM_NS)) + ingestDIM(context, dso, field.getChildren()); + + else if (field.getName().equals("field") && + field.getNamespace().equals(DIM_NS) && + schema != null && schema.equals("dc")) + { + String md = getMetadataForDIM(field); + if (md == null) + log.warn("Cannot map to Coll/Comm metadata field, DIM element="+ + field.getAttributeValue("element")+", qualifier="+field.getAttributeValue("qualifier")); + else + { + if (type == Constants.COLLECTION) + ((Collection)dso).setMetadata(md, field.getText()); + else + ((Community)dso).setMetadata(md, field.getText()); + } + } + + else + log.warn("ignoring unrecognized DIM element: "+field.toString()); + } + } + else + throw new CrosswalkObjectNotSupported("XsltSubmissionionCrosswalk can only crosswalk to an Item."); + + } + /** * Simple command-line rig for testing the DIM output of a stylesheet. * Usage: java XSLTIngestionCrosswalk Index: src/org/dspace/content/packager/AbstractMETSDisseminator.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/packager/AbstractMETSDisseminator.java,v retrieving revision 1.1 diff -b -w -u -r1.1 AbstractMETSDisseminator.java --- src/org/dspace/content/packager/AbstractMETSDisseminator.java 17 Mar 2006 00:04:38 -0000 1.1 +++ src/org/dspace/content/packager/AbstractMETSDisseminator.java 4 Jan 2007 08:08:33 -0000 @@ -43,6 +43,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.PipedOutputStream; +import java.io.PipedInputStream; import java.sql.SQLException; import java.util.ArrayList; import java.util.Date; @@ -53,33 +55,17 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; -import org.apache.log4j.Logger; -import org.dspace.authorize.AuthorizeException; -import org.dspace.authorize.AuthorizeManager; -import org.dspace.content.Bitstream; -import org.dspace.content.Bundle; -import org.dspace.content.DSpaceObject; -import org.dspace.content.Item; -import org.dspace.content.crosswalk.CrosswalkException; -import org.dspace.content.crosswalk.DisseminationCrosswalk; -import org.dspace.core.ConfigurationManager; -import org.dspace.core.Constants; -import org.dspace.core.Context; -import org.dspace.core.PluginManager; -import org.dspace.core.Utils; -import org.jdom.Namespace; -import org.jdom.output.Format; -import org.jdom.output.XMLOutputter; - import edu.harvard.hul.ois.mets.Agent; import edu.harvard.hul.ois.mets.AmdSec; import edu.harvard.hul.ois.mets.Checksumtype; import edu.harvard.hul.ois.mets.Div; import edu.harvard.hul.ois.mets.DmdSec; +import edu.harvard.hul.ois.mets.MdRef; import edu.harvard.hul.ois.mets.FLocat; import edu.harvard.hul.ois.mets.FileGrp; import edu.harvard.hul.ois.mets.FileSec; import edu.harvard.hul.ois.mets.Fptr; +import edu.harvard.hul.ois.mets.Mptr; import edu.harvard.hul.ois.mets.Loctype; import edu.harvard.hul.ois.mets.MdWrap; import edu.harvard.hul.ois.mets.Mdtype; @@ -89,6 +75,10 @@ import edu.harvard.hul.ois.mets.Role; import edu.harvard.hul.ois.mets.StructMap; import edu.harvard.hul.ois.mets.TechMD; +import edu.harvard.hul.ois.mets.SourceMD; +import edu.harvard.hul.ois.mets.DigiprovMD; +import edu.harvard.hul.ois.mets.RightsMD; +import edu.harvard.hul.ois.mets.helper.MdSec; import edu.harvard.hul.ois.mets.Type; import edu.harvard.hul.ois.mets.XmlData; import edu.harvard.hul.ois.mets.helper.MetsElement; @@ -98,6 +88,31 @@ import edu.harvard.hul.ois.mets.helper.PCData; import edu.harvard.hul.ois.mets.helper.PreformedXML; +import org.apache.log4j.Logger; + +import org.dspace.authorize.AuthorizeException; +import org.dspace.authorize.AuthorizeManager; +import org.dspace.content.Bitstream; +import org.dspace.content.Bundle; +import org.dspace.content.Community; +import org.dspace.content.Collection; +import org.dspace.content.DSpaceObject; +import org.dspace.content.Item; +import org.dspace.content.ItemIterator; +import org.dspace.content.crosswalk.CrosswalkException; +import org.dspace.content.crosswalk.CrosswalkObjectNotSupported; +import org.dspace.content.crosswalk.DisseminationCrosswalk; +import org.dspace.content.crosswalk.StreamDisseminationCrosswalk; +import org.dspace.handle.HandleManager; +import org.dspace.core.Constants; +import org.dspace.core.Context; +import org.dspace.core.PluginManager; +import org.dspace.core.Utils; +import org.jdom.Element; +import org.jdom.Namespace; +import org.jdom.output.Format; +import org.jdom.output.XMLOutputter; + /** * Base class for disseminator of * METS (Metadata Encoding & Transmission Standard) Package.
@@ -114,6 +129,10 @@ * other metadata (such as licenses) will be encoded inline. * Default is false. * + * internal -- if true, generate a standalone XML document + * for an Internal AIP, very similar to the effect of manifestOnly. + * Default is false. + * * unauthorized -- this determines what is done when the * packager encounters a Bundle or Bitstream it is not authorized to * read. By default, it just quits with an AuthorizeException. @@ -132,9 +151,6 @@ /** log4j category */ private static Logger log = Logger.getLogger(AbstractMETSDisseminator.class); - /** Filename of manifest, relative to package toplevel. */ - public static final String MANIFEST_FILE = "mets.xml"; - // JDOM xml output writer - indented format for readability. private static XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat()); @@ -142,26 +158,49 @@ private int idCounter = 1; /** - * Table of files to add to package, such as mdRef'd metadata. - * Key is relative pathname of file, value is InputStream - * with contents to put in it. - * New map is created by disseminate(). + * Wrapper for a table of streams to add to the package, such as + * mdRef'd metadata. Key is relative pathname of file, value is + * InputStream with contents to put in it. Some + * superclasses will put streams in this table when adding an mdRef + * element to e.g. a rightsMD segment. */ - protected Map extraFiles = null; + protected class MdStreamCache + { + private Map extraFiles = new HashMap(); + + public void addStream(MdRef key, InputStream md) + { + extraFiles.put(key, md); + } + + public Map getMap() + { + return extraFiles; + } + + public void close() + throws IOException + { + for (InputStream is : extraFiles.values()) + is.close(); + } + } /** - * Make a new unique ID with specified prefix. + * Make a new unique ID symbol with specified prefix. * @param prefix the prefix of the identifier, constrained to XML ID schema * @return a new string identifier unique in this session (instance). */ - protected String gensym(String prefix) + protected synchronized String gensym(String prefix) { return prefix + "_" + String.valueOf(idCounter++); } public String getMIMEType(PackageParameters params) { - return (params != null && params.getProperty("manifestOnly") != null) ? + return (params != null && + (params.getBooleanProperty("manifestOnly", false) || + params.getBooleanProperty("internal", false))) ? "text/xml" : "application/zip"; } @@ -181,49 +220,101 @@ PackageParameters params, OutputStream pkg) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException { + long lmTime = 0; if (dso.getType() == Constants.ITEM) - { - Item item = (Item)dso; - long lmTime = item.getLastModified().getTime(); + lmTime = ((Item)dso).getLastModified().getTime(); // how to handle unauthorized bundle/bitstream: String unauth = (params == null) ? null : params.getProperty("unauthorized"); + MdStreamCache extraStreams = null; + try + { + // Generate a true manifest-only "package", no external data. + if (params != null && params.getBooleanProperty("manifestOnly", false)) + { + Mets manifest = makeManifest(context, dso, params, null); + manifest.validate(new MetsValidator()); + manifest.write(new MetsWriter(pkg)); + } - if (params != null && params.getProperty("manifestOnly") != null) + // for Internal "package", i.e. AIP saved in Asset Store, + // put each "extra" metadata streams in a Bitstream and then + // add the URI of that Bitstream to the PackageParam + // additionalBitstreamURIs so the client can find it. Sneaky! + else if (params != null && params.getBooleanProperty("internal", false)) { - extraFiles = null; - writeManifest(context, item, params, pkg); + extraStreams = new MdStreamCache(); + Mets manifest = makeManifest(context, dso, params, extraStreams); + for (Map.Entry ment : extraStreams.getMap().entrySet()) + { + InputStream is = (InputStream)ment.getValue(); + Bitstream bs = Bitstream.create(context, is); + is.close(); + + String fname = makeBitstreamURL(bs, params); + if (log.isDebugEnabled()) + log.debug("Wrote EXTRA stream to a disconnected bitstream: "+fname); + MdRef ref = (MdRef)ment.getKey(); + ref.setXlinkHref(fname); + params.addProperty("additionalBitstreamURIs", fname); } + + // can only validate now after fixing up extraStreams + manifest.validate(new MetsValidator()); + manifest.write(new MetsWriter(pkg)); + } + + // make a Zip-based package else { - extraFiles = new HashMap(); + // map of extra streams to put in Zip + extraStreams = new MdStreamCache(); ZipOutputStream zip = new ZipOutputStream(pkg); zip.setComment("METS archive created by DSpace METSDisseminationCrosswalk"); + Mets manifest = makeManifest(context, dso, params, extraStreams); - // write manifest first. - ZipEntry me = new ZipEntry(MANIFEST_FILE); - me.setTime(lmTime); - zip.putNextEntry(me); - writeManifest(context, item, params, zip); - zip.closeEntry(); - - // copy extra (meta?) bitstreams into zip - Iterator fi = extraFiles.keySet().iterator(); - while (fi.hasNext()) + // copy extra (meta?) bitstreams into zip, update manifest + if (extraStreams != null) { - String fname = (String)fi.next(); + for (Map.Entry ment : extraStreams.getMap().entrySet()) + { + MdRef ref = (MdRef)ment.getKey(); + InputStream is = (InputStream)ment.getValue(); + // hopefully unique filename within the Zip + String fname = gensym("metadata"); + ref.setXlinkHref(fname); + if (log.isDebugEnabled()) + log.debug("Writing EXTRA stream to Zip: "+fname); ZipEntry ze = new ZipEntry(fname); + if (lmTime != 0) ze.setTime(lmTime); zip.putNextEntry(ze); - Utils.copy((InputStream)extraFiles.get(fname), zip); + Utils.copy(is, zip); zip.closeEntry(); + is.close(); + } } + // write manifest after metadata. + ZipEntry me = new ZipEntry(METSManifest.MANIFEST_FILE); + if (lmTime != 0) + me.setTime(lmTime); + zip.putNextEntry(me); + + // can only validate now after fixing up extraStreams + manifest.validate(new MetsValidator()); + manifest.write(new MetsWriter(zip)); + zip.closeEntry(); + // copy all non-meta bitstreams into zip + if (dso.getType() == Constants.ITEM) + { + Item item = (Item)dso; + Bundle bundles[] = item.getBundles(); for (int i = 0; i < bundles.length; i++) { - if (!PackageUtils.isMetaInfoBundle(bundles[i])) + if (includeBundle(bundles[i])) { // unauthorized bundle? if (!AuthorizeManager.authorizeActionBoolean(context, @@ -246,8 +337,12 @@ if (auth || (unauth != null && unauth.equalsIgnoreCase("zero"))) { - ZipEntry ze = new ZipEntry( - makeBitstreamName(bitstreams[k])); + String zname = makeBitstreamURL(bitstreams[k], params); + ZipEntry ze = new ZipEntry(zname); + if (log.isDebugEnabled()) + log.debug("Writing CONTENT stream of bitstream("+String.valueOf(bitstreams[k].getID())+") to Zip: "+zname+ + ", size="+String.valueOf(bitstreams[k].getSize())); + if (lmTime != 0) ze.setTime(lmTime); ze.setSize(auth ? bitstreams[k].getSize() : 0); zip.putNextEntry(ze); @@ -269,26 +364,43 @@ } } } - zip.close(); - extraFiles = null; } + // Coll, Comm just add logo bitstream to content if there is one + else if (dso.getType() == Constants.COLLECTION || + dso.getType() == Constants.COMMUNITY) + { + Bitstream logoBs = dso.getType() == Constants.COLLECTION ? + ((Collection)dso).getLogo() : + ((Community)dso).getLogo(); + if (logoBs != null) + { + String zname = makeBitstreamURL(logoBs, params); + ZipEntry ze = new ZipEntry(zname); + if (log.isDebugEnabled()) + log.debug("Writing CONTENT stream of bitstream("+String.valueOf(logoBs.getID())+") to Zip: "+zname+", size="+String.valueOf(logoBs.getSize())); + ze.setSize(logoBs.getSize()); + zip.putNextEntry(ze); + Utils.copy(logoBs.retrieve(), zip); + zip.closeEntry(); } - else - throw new PackageValidationException("Can only disseminate an Item now."); } - - /** - * Create name that bitstream will have in archive. Name must - * be unique and relative to archive top level, e.g. "bitstream_.ext" - */ - private String makeBitstreamName(Bitstream bitstream) + zip.close(); + } + } + catch (MetsException e) { - String base = "bitstream_"+String.valueOf(bitstream.getID()); - String ext[] = bitstream.getFormat().getExtensions(); - return (ext.length > 0) ? base+"."+ext[0] : base; + // We don't pass up a MetsException, so callers don't need to + // know the details of the METS toolkit + log.error("METS error: ",e); + throw new PackageValidationException(e); + } + finally + { + if (extraStreams != null) + extraStreams.close(); + } } - // set metadata type - if Mdtype.parse() gets exception, // that means it's not in the MDTYPE vocabulary, so use OTHER. @@ -305,144 +417,302 @@ } } + // set metadata type - if Mdtype.parse() gets exception, + // that means it's not in the MDTYPE vocabulary, so use OTHER. + private void setMdType(MdRef mdRef, String mdtype) + { + try + { + mdRef.setMDTYPE(Mdtype.parse(mdtype)); + } + catch (MetsException e) + { + mdRef.setMDTYPE(Mdtype.OTHER); + mdRef.setOTHERMDTYPE(mdtype); + } + } + /** - * Write out a METS manifest. - * Mostly lifted from Rob Tansley's METS exporter. + * Create an element wrapped around a metadata reference (either mdWrap + * or mdRef); i.e. dmdSec, techMd, sourceMd, etc. Checks for + * XML-DOM oriented crosswalk first, then if not found looks for + * stream crosswalk of the same name. + * + * @returns mdSec element or null if xwalk returns empty results. */ - private void writeManifest(Context context, Item item, - PackageParameters params, OutputStream out) - throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException - + private MdSec makeMdSec(Context context, DSpaceObject dso, Class mdSecClass, String typeSpec, + MdStreamCache extraStreams) + throws SQLException, PackageValidationException, CrosswalkException, + IOException, AuthorizeException { - try + // for running a stream-oriented xwalk in a background thread. + class BackgroundWriter implements Runnable { - // Create the METS file - Mets mets = new Mets(); + private Context bgContext; + private StreamDisseminationCrosswalk bgSxwalk; + private DSpaceObject bgDso; + private PipedOutputStream os = null; - // Top-level stuff - mets.setID(gensym("mets")); - mets.setOBJID("hdl:" + item.getHandle()); - mets.setLABEL("DSpace Item"); - mets.setPROFILE(getProfile()); + BackgroundWriter(Context c, StreamDisseminationCrosswalk x, DSpaceObject d) + { + super(); + bgContext = c; + bgSxwalk = x; + bgDso = d; + } - // MetsHdr - MetsHdr metsHdr = new MetsHdr(); - metsHdr.setCREATEDATE(new Date()); // FIXME: CREATEDATE is now: - // maybe should be item create - // date? - - // Agent - Agent agent = new Agent(); - agent.setROLE(Role.CUSTODIAN); - agent.setTYPE(Type.ORGANIZATION); - Name name = new Name(); - name.getContent() - .add(new PCData(ConfigurationManager - .getProperty("dspace.name"))); - agent.getContent().add(name); - metsHdr.getContent().add(agent); - mets.getContent().add(metsHdr); + // set up pipe, save output-stm end, return input-stm end. + InputStream getInputStream() + throws IOException + { + os = new PipedOutputStream(); + return new PipedInputStream(os); + } - // add DMD sections - // Each type element MAY be either just a MODS-and-crosswalk name, OR - // a combination "MODS-name:crosswalk-name" (e.g. "DC:qDC"). - String dmdTypes[] = getDmdTypes(params); + // pipe the AIP into a bitstream in separate thread. + public void run() + { + try + { + bgSxwalk.disseminate(bgContext, bgDso, os); + os.close(); + } + catch (IOException e) + { + log.error("run(): Got IOException: ",e); + } + catch (SQLException e) + { + log.error("run(): Got SQLException: ",e); + } + catch (CrosswalkException e) + { + log.error("run(): Got SQLException: ",e); + } + catch (AuthorizeException e) + { + log.error("run(): Got SQLException: ",e); + } + } + } /* end class BackgroundWriter */ - // record of ID of each dmdsec to make DMDID in structmap. - String dmdGroup = gensym("dmd_group"); - String dmdId[] = new String[dmdTypes.length]; - for (int i = 0; i < dmdTypes.length; ++i) + try { - dmdId[i] = gensym("dmd"); - XmlData xmlData = new XmlData(); + MdSec mdSec = (MdSec) mdSecClass.newInstance(); + mdSec.setID(gensym(mdSec.getLocalName())); + String parts[] = typeSpec.split(":", 2); String xwalkName, metsName; - String parts[] = dmdTypes[i].split(":", 2); if (parts.length > 1) { metsName = parts[0]; xwalkName = parts[1]; } else - xwalkName = metsName = dmdTypes[i]; + xwalkName = metsName = typeSpec; + // first look for DOM-type crosswalk: DisseminationCrosswalk xwalk = (DisseminationCrosswalk) PluginManager.getNamedPlugin(DisseminationCrosswalk.class, xwalkName); - if (xwalk == null) - throw new PackageValidationException("Cannot find "+dmdTypes[i]+" crosswalk plugin!"); - else - crosswalkToMets(xwalk, item, xmlData); - - DmdSec dmdSec = new DmdSec(); - dmdSec.setID(dmdId[i]); - dmdSec.setGROUPID(dmdGroup); + if (xwalk != null) + { + // for wrapping an embedded XML model MdWrap mdWrap = new MdWrap(); setMdType(mdWrap, metsName); + XmlData xmlData = new XmlData(); + if (crosswalkToMetsElement(xwalk, dso, xmlData) != null) + { mdWrap.getContent().add(xmlData); - dmdSec.getContent().add(mdWrap); - mets.getContent().add(dmdSec); + mdSec.getContent().add(mdWrap); + return mdSec; + } + else + return null; } - // Only add license AMD section if there are any licenses. - // Catch authorization failures accessing license bitstreams - // only if we are skipping unauthorized bitstreams. - String licenseID = null; - try + // next try looking for stream-oriented crosswalk: + else { - AmdSec amdSec = new AmdSec(); - addRightsMd(context, item, amdSec); - if (amdSec.getContent().size() > 0) - { - licenseID = gensym("license"); - amdSec.setID(licenseID); - mets.getContent().add(amdSec); + StreamDisseminationCrosswalk sxwalk = (StreamDisseminationCrosswalk) + PluginManager.getNamedPlugin(StreamDisseminationCrosswalk.class, xwalkName); + if (sxwalk != null) + { + if (sxwalk.canDisseminate(context, dso)) + { + // start up slave thread to feed crosswalk to input stream + MdRef mdRef = new MdRef(); + BackgroundWriter bgw = new BackgroundWriter(context, sxwalk, dso); + extraStreams.addStream(mdRef, bgw.getInputStream()); + Thread slave = new Thread(bgw); + slave.setDaemon(true); + slave.start(); + + mdRef.setMIMETYPE(sxwalk.getMIMEType()); + setMdType(mdRef, metsName); + mdRef.setLOCTYPE(Loctype.URL); + mdSec.getContent().add(mdRef); + return mdSec; } + else + return null; } - catch (AuthorizeException e) - { - String unauth = (params == null) ? null : params.getProperty("unauthorized"); - if (!(unauth != null && unauth.equalsIgnoreCase("skip"))) - throw e; else - log.warn("Skipping license metadata because of access failure: "+e.toString()); + throw new PackageValidationException("Cannot find "+xwalkName+" crosswalk plugin, either DisseminationCrosswalk or StreamDisseminationCrosswalk"); + } + } + catch (InstantiationException e) + { + throw new PackageValidationException("Error instantiating Mdsec object: "+ e.toString()); + } + catch (IllegalAccessException e) + { + throw new PackageValidationException("Error instantiating Mdsec object: "+ e.toString()); + } } - // FIXME: History data???? Nooooo!!!! + // add either a techMd or sourceMd element to amdSec. + // mdSecClass determines which type. + // mdTypes[] is array of "[metsName:]PluginName" strings, maybe empty. + private void addToAmdSec(AmdSec fAmdSec, String mdTypes[], Class mdSecClass, + Context context, DSpaceObject dso, MdStreamCache extraStreams) + throws SQLException, PackageValidationException, CrosswalkException, + IOException, AuthorizeException + { + for (int i = 0; i < mdTypes.length; ++i) + { + MdSec md = makeMdSec(context, dso, mdSecClass, mdTypes[i], extraStreams); + if (md != null) + fAmdSec.getContent().add(md); + } + } - // fileSec - all non-metadata bundles go into fileGrp, - // and each bitstream therein into a file. - // Create the bitstream-level techMd and div's for structmap - // at the same time so we can connec the IDREFs to IDs. - FileSec fileSec = new FileSec(); + // Create amdSec for any tech md's, return its ID attribute. + private String addAmdSec(Context context, DSpaceObject dso, PackageParameters params, + Mets mets, MdStreamCache extraStreams) + throws SQLException, PackageValidationException, CrosswalkException, + IOException, AuthorizeException + { + String techMdTypes[] = getTechMdTypes(context, dso, params); + String rightsMdTypes[] = getRightsMdTypes(context, dso, params); + String sourceMdTypes[] = getSourceMdTypes(context, dso, params); + String digiprovMdTypes[] = getDigiprovMdTypes(context, dso, params); - String techMdType = getTechMdType(params); - String parts[] = techMdType.split(":", 2); - String xwalkName, metsName; - if (parts.length > 1) + // only bother if there are any sections to add + if ((techMdTypes.length+sourceMdTypes.length+ + digiprovMdTypes.length+rightsMdTypes.length) > 0) { - metsName = parts[0]; - xwalkName = parts[1]; + String result = gensym("amd"); + AmdSec fAmdSec = new AmdSec(); + fAmdSec.setID(result); + addToAmdSec(fAmdSec, techMdTypes, TechMD.class, context, dso, extraStreams); + addToAmdSec(fAmdSec, rightsMdTypes, RightsMD.class, context, dso, extraStreams); + addToAmdSec(fAmdSec, sourceMdTypes, SourceMD.class, context, dso, extraStreams); + addToAmdSec(fAmdSec, digiprovMdTypes, DigiprovMD.class, context, dso, extraStreams); + + mets.getContent().add(fAmdSec); + return result; } else - xwalkName = metsName = techMdType; + return null; + } - DisseminationCrosswalk xwalk = (DisseminationCrosswalk) - PluginManager.getNamedPlugin(DisseminationCrosswalk.class, xwalkName); - if (xwalk == null) - throw new PackageValidationException("Cannot find "+xwalkName+" crosswalk plugin!"); + // make the most "persistent" identifier possible, preferably a URN + // based on the Handle. + private String makePersistentID(DSpaceObject dso) + { + String handle = dso.getHandle(); + + // If no Handle, punt to much-less-satisfactory database ID and type.. + if (handle == null) + return "DSpace_DB_"+Constants.typeText[dso.getType()] + "_" + String.valueOf(dso.getID()); + else + return getHandleURN(handle); + } + + /** + * Write out a METS manifest. + * Mostly lifted from Rob Tansley's METS exporter. + */ + private Mets makeManifest(Context context, DSpaceObject dso, + PackageParameters params, + MdStreamCache extraStreams) + throws MetsException, PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException + + { + // Create the METS manifest in memory + Mets mets = new Mets(); + String typeStr = Constants.typeText[dso.getType()]; - // log the primary bitstream for structmap - String primaryBitstreamFileID = null; + // this ID should be globally unique + mets.setID("dspace"+Utils.generateKey()); - // accumulate content DIV items to put in structMap later. - List contentDivs = new ArrayList(); + // identifies the object described by this document + mets.setOBJID(makePersistentID(dso)); + mets.setTYPE("DSpace "+typeStr); + + // this is the signature by which the ingester will recognize + // a document it can expect to interpret. + mets.setPROFILE(getProfile()); + + MetsHdr metsHdr = makeMetsHdr(context, dso, params); + if (metsHdr != null) + mets.getContent().add(metsHdr); + + // add DMD sections + // Each type element MAY be either just a MODS-and-crosswalk name, OR + // a combination "MODS-name:crosswalk-name" (e.g. "DC:qDC"). + String dmdTypes[] = getDmdTypes(context, dso, params); + + // record of ID of each dmdsec to make DMDID in structmap. + String dmdGroup = gensym("dmd_group"); + String dmdId[] = new String[dmdTypes.length]; + for (int i = 0; i < dmdTypes.length; ++i) + { + MdSec dmdSec = makeMdSec(context, dso, DmdSec.class, dmdTypes[i], extraStreams); + if (dmdSec != null) + { + mets.getContent().add(dmdSec); + dmdId[i] = dmdSec.getID(); + } + } + + // add object-wide technical/source MD segments, get ID string: + // Put that ID in ADMID of first div in structmap. + String objectAMDID = addAmdSec(context, dso, params, mets, extraStreams); + + // Create simple structMap: initial div represents the Object's + // contents, its children are e.g. Item bitstreams (content only), + // Collection's members, or Community's members. + StructMap structMap = new StructMap(); + structMap.setID(gensym("struct")); + structMap.setTYPE("LOGICAL"); + structMap.setLABEL("DSpace Object"); + Div div0 = new Div(); + div0.setID(gensym("div")); + div0.setTYPE("DSpace Object Contents"); + structMap.getContent().add(div0); + + // fileSec is optional, let object type create it if needed. + FileSec fileSec = null; + + // Item-specific manifest - license, bitstreams as Files, etc. + if (dso.getType() == Constants.ITEM) + { + // this tags file ID and group identifiers for bitstreams. + String bitstreamIDstart = "bitstream_"; + Item item = (Item)dso; // how to handle unauthorized bundle/bitstream: String unauth = (params == null) ? null : params.getProperty("unauthorized"); + // fileSec - all non-metadata bundles go into fileGrp, + // and each bitstream therein into a file. + // Create the bitstream-level techMd and div's for structmap + // at the same time so we can connec the IDREFs to IDs. + fileSec = new FileSec(); Bundle[] bundles = item.getBundles(); for (int i = 0; i < bundles.length; i++) { - if (PackageUtils.isMetaInfoBundle(bundles[i])) + if (!includeBundle(bundles[i])) continue; // unauthorized bundle? @@ -459,10 +729,8 @@ Bitstream[] bitstreams = bundles[i].getBitstreams(); - // Create a fileGrp + // Create a fileGrp, USE = permuted Bundle name FileGrp fileGrp = new FileGrp(); - - // Bundle name for USE attribute String bName = bundles[i].getName(); if ((bName != null) && !bName.equals("")) fileGrp.setUSE(bundleToFileGrp(bName)); @@ -494,39 +762,30 @@ } String sid = String.valueOf(bitstreams[bits].getSequenceID()); - + String fileID = bitstreamIDstart + sid; edu.harvard.hul.ois.mets.File file = new edu.harvard.hul.ois.mets.File(); - - String xmlIDstart = "bitstream_"; - String fileID = xmlIDstart + sid; - file.setID(fileID); + file.setSEQ(bitstreams[bits].getSequenceID()); + fileGrp.getContent().add(file); - // log primary bitstream for later (structMap) + // set primary bitstream in structMap if (bitstreams[bits].getID() == primaryBitstreamID) - primaryBitstreamFileID = fileID; - - // if this is content, add to structmap too: - if (isContentBundle) { - Div div = new Div(); - div.setID(gensym("div")); - div.setTYPE("DSpace Content Bitstream"); Fptr fptr = new Fptr(); fptr.setFILEID(fileID); - div.getContent().add(fptr); - contentDivs.add(div); + div0.getContent().add(0, fptr); } - file.setSEQ(bitstreams[bits].getSequenceID()); - - String groupID = "GROUP_" + xmlIDstart + sid; + // if this is content, add to structmap too: + if (isContentBundle) + div0.getContent().add(makeFileDiv(fileID, "DSpace Content Bitstream")); /* * If we're in THUMBNAIL or TEXT bundles, the bitstream is * extracted text or a thumbnail, so we use the name to work * out which bitstream to be in the same group as */ + String groupID = "GROUP_" + bitstreamIDstart + sid; if ((bundles[i].getName() != null) && (bundles[i].getName().equals("THUMBNAIL") || bundles[i].getName().startsWith("TEXT"))) @@ -535,14 +794,12 @@ // derived bitstream in the same group Bitstream original = findOriginalBitstream(item, bitstreams[bits]); - if (original != null) { - groupID = "GROUP_" + xmlIDstart + groupID = "GROUP_" + bitstreamIDstart + original.getSequenceID(); } } - file.setGROUPID(groupID); file.setMIMETYPE(bitstreams[bits].getFormat().getMIMEType()); @@ -550,7 +807,7 @@ file.setSIZE(auth ? bitstreams[bits].getSize() : 0); - // translate checksum and type to METS, if available. + // FIXME: need to translate checksum and type to METS, if available. String csType = bitstreams[bits].getChecksumAlgorithm(); String cs = bitstreams[bits].getChecksum(); if (auth && cs != null && csType != null) @@ -566,86 +823,169 @@ } } - // FLocat: filename is MD5 checksum + // FLocat: point to internal or external location of bitstream contents. FLocat flocat = new FLocat(); flocat.setLOCTYPE(Loctype.URL); - flocat.setXlinkHref(makeBitstreamName(bitstreams[bits])); + flocat.setXlinkHref(makeBitstreamURL(bitstreams[bits], params)); + file.getContent().add(flocat); - // Make bitstream techMD metadata, add to file. - String techID = "techMd_for_bitstream_"+bitstreams[bits].getSequenceID(); - AmdSec fAmdSec = new AmdSec(); - fAmdSec.setID(techID); - TechMD techMd = new TechMD(); - techMd.setID(gensym("tech")); - MdWrap mdWrap = new MdWrap(); - setMdType(mdWrap, metsName); - XmlData xmlData = new XmlData(); - mdWrap.getContent().add(xmlData); - techMd.getContent().add(mdWrap); - fAmdSec.getContent().add(techMd); - mets.getContent().add(fAmdSec); - crosswalkToMets(xwalk, bitstreams[bits], xmlData); + // technical metadata for bitstream + String techID = addAmdSec(context, bitstreams[bits], params, mets, extraStreams); + if (techID != null) file.setADMID(techID); - - // Add FLocat to File, and File to FileGrp - file.getContent().add(flocat); - fileGrp.getContent().add(file); } - - // Add fileGrp to fileSec fileSec.getContent().add(fileGrp); } - - // Add fileSec to document + } + else if (dso.getType() == Constants.COLLECTION) + { + ItemIterator ii = ((Collection)dso).getItems(); + while (ii.hasNext()) + { + Item item = ii.next(); + String hdl = item.getHandle(); + if (hdl == null) + log.warn("Collection packager is skipping Item without handle: "+item.toString()); + else + div0.getContent().add(makeHandleDiv(hdl, "DSpace Item")); + } + Bitstream logoBs = ((Collection)dso).getLogo(); + if (logoBs != null) + { + fileSec = new FileSec(); + addLogoBitstream(logoBs, fileSec, div0, params); + } + } + else if (dso.getType() == Constants.COMMUNITY) + { + // make separate sub-divs for subcomm's and colls: + Div subcommDiv = new Div(); + subcommDiv.setID(gensym("div")); + subcommDiv.setTYPE("SUBCOMMUNITIES"); + div0.getContent().add(subcommDiv); + Div collDiv = new Div(); + collDiv.setID(gensym("div")); + collDiv.setTYPE("COLLECTIONS"); + div0.getContent().add(collDiv); + Community subcomms[] = ((Community)dso).getSubcommunities(); + for (int i = 0; i < subcomms.length; ++i) + { + String hdl = subcomms[i].getHandle(); + if (hdl == null) + log.warn("Collection packager is skipping Subcommunity without handle: "+subcomms[i].toString()); + else + subcommDiv.getContent().add(makeHandleDiv(hdl, "DSpace Community")); + } + Collection colls[] = ((Community)dso).getCollections(); + for (int i = 0; i < colls.length; ++i) + { + String hdl = colls[i].getHandle(); + if (hdl == null) + log.warn("Collection packager is skipping Collection without handle: "+colls[i].toString()); + else + collDiv.getContent().add(makeHandleDiv(hdl, "DSpace Collection")); + } + Bitstream logoBs = ((Community)dso).getLogo(); + if (logoBs != null) + { + fileSec = new FileSec(); + addLogoBitstream(logoBs, fileSec, div0, params); + } + } + if (fileSec != null) mets.getContent().add(fileSec); + mets.getContent().add(structMap); - // Create simple structMap: initial div represents the Item, - // and user-visible content bitstreams are in its child divs. + // set links to metadata for object -- after type-specific + // code since that can add to the object metadata. StringBuffer dmdIds = new StringBuffer(); for (int i = 0; i < dmdId.length; ++i) dmdIds.append(" "+dmdId[i]); - StructMap structMap = new StructMap(); - structMap.setID(gensym("struct")); - structMap.setTYPE("LOGICAL"); - structMap.setLABEL("DSpace"); - Div div0 = new Div(); - div0.setID(gensym("div")); - div0.setTYPE("DSpace Item"); div0.setDMDID(dmdIds.substring(1)); - if (licenseID != null) - div0.setADMID(licenseID); - - // if there is a primary bitstream, add FPTR to it. - if (primaryBitstreamFileID != null) - { - Fptr fptr = new Fptr(); - fptr.setFILEID(primaryBitstreamFileID); - div0.getContent().add(fptr); - } - - // add DIV for each content bitstream - div0.getContent().addAll(contentDivs); - - structMap.getContent().add(div0); + if (objectAMDID != null) + div0.setADMID(objectAMDID); // Does subclass have something to add to structMap? - addStructMap(context, item, params, mets); + addStructMap(context, dso, params, mets); - mets.getContent().add(structMap); + return mets; + } - mets.validate(new MetsValidator()); + // Install logo bitstream into METS for Community, Collection. + // Add a file element, and refer to it from an fptr in the first div + // of the main structMap. + private void addLogoBitstream(Bitstream logoBs, FileSec fileSec, Div div0, PackageParameters params) + { + edu.harvard.hul.ois.mets.File file = new edu.harvard.hul.ois.mets.File(); + String fileID = gensym("logo"); + file.setID(fileID); + file.setMIMETYPE(logoBs.getFormat().getMIMEType()); + file.setSIZE(logoBs.getSize()); - mets.write(new MetsWriter(out)); + // FIXME: need to translate checksum and type to METS, if available. + String csType = logoBs.getChecksumAlgorithm(); + String cs = logoBs.getChecksum(); + if (cs != null && csType != null) + { + try + { + file.setCHECKSUMTYPE(Checksumtype.parse(csType)); + file.setCHECKSUM(cs); } catch (MetsException e) { - // We don't pass up a MetsException, so callers don't need to - // know the details of the METS toolkit - // e.printStackTrace(); - throw new PackageValidationException(e); + log.warn("Cannot set bitstream checksum type="+csType+" in METS."); + } + } + FLocat flocat = new FLocat(); + flocat.setLOCTYPE(Loctype.URL); + flocat.setXlinkHref(makeBitstreamURL(logoBs, params)); + file.getContent().add(flocat); + FileGrp fileGrp = new FileGrp(); + fileGrp.setUSE("LOGO"); + fileGrp.getContent().add(file); + fileSec.getContent().add(fileGrp); + + // add fptr directly to div0 of structMap + Fptr fptr = new Fptr(); + fptr.setFILEID(fileID); + div0.getContent().add(0, fptr); } + + // create

element pointing to a file + private Div makeFileDiv(String fileID, String type) + { + Div div = new Div(); + div.setID(gensym("div")); + div.setTYPE(type); + Fptr fptr = new Fptr(); + fptr.setFILEID(fileID); + div.getContent().add(fptr); + return div; } + // create
element with mptr pointing to a Handle + private Div makeHandleDiv(String handle, String type) + { + Div div = new Div(); + div.setID(gensym("div")); + div.setTYPE(type); + Mptr mptr = new Mptr(); + mptr.setID(gensym("mptr")); + mptr.setLOCTYPE(Loctype.HANDLE); + mptr.setXlinkHref(handle); + div.getContent().add(mptr); + return div; + } + + // put handle in canonical URN format -- note that HandleManager's + // canonicalize currently returns HTTP URL format. + protected String getHandleURN(String handle) + { + if (handle.startsWith("hdl:")) + return handle; + return "hdl:"+handle; + } /** * For a bitstream that's a thumbnail or extracted text, find the @@ -693,11 +1033,14 @@ // Get result from crosswalk plugin and add it to the document, // including namespaces and schema. - private void crosswalkToMets(DisseminationCrosswalk xwalk, + // returns the new/modified element upon success. + private MetsElement crosswalkToMetsElement(DisseminationCrosswalk xwalk, DSpaceObject dso, MetsElement me) throws CrosswalkException, IOException, SQLException, AuthorizeException { + try + { // add crosswalk's namespaces and schemaLocation to this element: String raw = xwalk.getSchemaLocation(); String sloc[] = raw == null ? null : raw.split("\\s+"); @@ -712,15 +1055,51 @@ } // add result of crosswalk - PreformedXML pXML = - new PreformedXML( - xwalk.preferList() ? - outputter.outputString(xwalk.disseminateList(dso)) : - outputter.outputString(xwalk.disseminateElement(dso))); + PreformedXML pXML = null; + if (xwalk.preferList()) + { + List res = xwalk.disseminateList(dso); + if (!(res == null || res.isEmpty())) + pXML = new PreformedXML(outputter.outputString(res)); + } + else + { + Element res = xwalk.disseminateElement(dso); + if (res != null) + pXML = new PreformedXML(outputter.outputString(res)); + } + if (pXML != null) + { me.getContent().add(pXML); + return me; + } + return null; } + catch (CrosswalkObjectNotSupported e) + { + // ignore this xwalk if object is unsupported. + if (log.isDebugEnabled()) + log.debug("Skipping MDsec because of CrosswalkObjectNotSupported: dso="+dso.toString()+", xwalk="+xwalk.getClass().getName()); + return null; + } + } + + /** + * Return identifier for bitstream in an Item; when making a package, + * this is the archive member name (e.g. in Zip file). In a bare + * manifest, it might be an external URL. The name should be in URL + * format ("file:" may be elided for in-archive filenames). It should + * be deterministic, since this gets called twice for each bitstream + * when building archive. + */ + abstract public String makeBitstreamURL(Bitstream bitstream, PackageParameters params); /** + * Create metsHdr element - separate so subclasses can override. + */ + abstract public MetsHdr makeMetsHdr(Context context, DSpaceObject dso, + PackageParameters params); + /** * Returns name of METS profile to which this package conforms, e.g. * "DSpace METS DIP Profile 1.0" * @return string name of profile. @@ -747,26 +1126,54 @@ * @param params the PackageParameters passed to the disseminator. * @return array of metadata type strings, never null. */ - abstract public String [] getDmdTypes(PackageParameters params) + abstract public String [] getDmdTypes(Context context, DSpaceObject dso, PackageParameters params) throws SQLException, IOException, AuthorizeException; /** * Get the type string of the technical metadata to create for each - * Bitstream in the Item. The type string may be a simple name or - * colon-separated compound as specified for getDmdTypes() above. + * object and each Bitstream in an Item. The type string may be a + * simple name or colon-separated compound as specified for + * getDmdTypes() above. * @param params the PackageParameters passed to the disseminator. * @return array of metadata type strings, never null. */ - abstract public String getTechMdType(PackageParameters params) + abstract public String[] getTechMdTypes(Context context, DSpaceObject dso, PackageParameters params) throws SQLException, IOException, AuthorizeException; /** - * Add Rights metadata for the Item, in the form of - * (rightsMd elements) to the given metadata section. + * Get the type string of the source metadata to create for each + * object and each Bitstream in an Item. The type string may be a + * simple name or colon-separated compound as specified for + * getDmdTypes() above. + * @param params the PackageParameters passed to the disseminator. + * @return array of metadata type strings, never null. + */ + abstract public String[] getSourceMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException; + + /** + * Get the type string of the "digiprov" (digital provenance) + * metadata to create for each object and each Bitstream in an Item. + * The type string may be a simple name or colon-separated compound + * as specified for getDmdTypes() above. * + * @param params the PackageParameters passed to the disseminator. + * @return array of metadata type strings, never null. */ - abstract public void addRightsMd(Context context, Item item, AmdSec amdSec) - throws SQLException, IOException, AuthorizeException, MetsException; + abstract public String[] getDigiprovMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException; + + /** + * Get the type string of the "rights" (permission and/or license) + * metadata to create for each object and each Bitstream in an Item. + * The type string may be a simple name or colon-separated compound + * as specified for getDmdTypes() above. + * + * @param params the PackageParameters passed to the disseminator. + * @return array of metadata type strings, never null. + */ + abstract public String[] getRightsMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException; /** * Add any additional structMap elements to the @@ -775,7 +1182,13 @@ * requirements is already present, so this does not need to do anything. * @param mets the METS document to which to add structMaps */ - abstract public void addStructMap(Context context, Item item, + abstract public void addStructMap(Context context, DSpaceObject dso, PackageParameters params, Mets mets) throws SQLException, IOException, AuthorizeException, MetsException; + + /** + * @return true when this bundle should be included as "content" + * in the package.. e.g. DSpace SIP does not include metadata bundles. + */ + abstract public boolean includeBundle(Bundle bundle); } Index: src/org/dspace/content/packager/AbstractMETSIngester.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/packager/AbstractMETSIngester.java,v retrieving revision 1.1 diff -b -w -u -r1.1 AbstractMETSIngester.java --- src/org/dspace/content/packager/AbstractMETSIngester.java 17 Mar 2006 00:04:38 -0000 1.1 +++ src/org/dspace/content/packager/AbstractMETSIngester.java 4 Jan 2007 08:08:33 -0000 @@ -55,13 +55,16 @@ import org.apache.log4j.Logger; import org.dspace.app.mediafilter.MediaFilter; import org.dspace.authorize.AuthorizeException; +import org.dspace.authorize.AuthorizeManager; import org.dspace.content.Bitstream; import org.dspace.content.BitstreamFormat; import org.dspace.content.Bundle; import org.dspace.content.Collection; +import org.dspace.content.Community; +import org.dspace.content.DSpaceObject; import org.dspace.content.FormatIdentifier; +import org.dspace.content.IngestionWrapper; import org.dspace.content.Item; -import org.dspace.content.WorkspaceItem; import org.dspace.content.crosswalk.CrosswalkException; import org.dspace.content.crosswalk.MetadataValidationException; import org.dspace.core.ConfigurationManager; @@ -69,6 +72,7 @@ import org.dspace.core.Context; import org.dspace.core.LogManager; import org.dspace.core.PluginManager; +import org.dspace.handle.HandleManager; import org.jdom.Element; /** @@ -80,13 +84,34 @@ * ingesters for more specific METS "profiles". METS is an * abstract and flexible framework that can encompass many * different kinds of metadata and inner package structures. + * + *

+ * Parameters: + * 1. "validate" -- true/false attempt to schema-validate the METS manifest. + * 2. "manifestOnly" -- package consists only of a manifest document. + * "internal" -- like ManifestOnly except refs are to bitstreams in assetstore. + * 3. "ignoreHandle" -- true/false, ignore AIP's idea of handle when ingesting. + * 4. "ignoreParent" -- true/false, ignore AIP's idea of parent when ingesting. + * *

- * Configuration: - * If the property mets.submission.preserveManifest is true, + * Configuration Properties: + * + * 1. mets.CONFIGNAME.ingest.preserveManifest - if true, * the METS manifest itself is preserved in a bitstream named * mets.xml in the METADATA bundle. If it is * false (the default), the manifest is discarded after ingestion. * + * 2. mets.CONFIGNAME.ingest.manifestBitstreamFormat - short name of + * the bitstream format to apply to the manifest; MUST be specified when + * preserveManifest is true. + * + * 3. mets.default.ingest.crosswalk.MD_SEC_NAME = PLUGIN_NAME + * Establishes a default crosswalk plugin for the given type of metadata + * in a METS mdSec (e.g. "DC", "MODS"). The plugin may be either a stream + * or XML-oriented ingestion crosswalk. Subclasses can override the + * default mapping with their own, substituting their configurationName + * for "default" in the configuration property key above. + * * @author Larry Stone * @version $Revision: 1.1 $ * @see org.dspace.content.packager.METSManifest @@ -97,37 +122,32 @@ /** log4j category */ private static Logger log = Logger.getLogger(AbstractMETSIngester.class); - /** Filename of manifest, relative to package toplevel. */ - public static final String MANIFEST_FILE = "mets.xml"; - - // bitstream format name of magic METS SIP format.. - private static final String MANIFEST_BITSTREAM_FORMAT = - "DSpace METS SIP"; - - // value of mets.submission.preserveManifest config key - private static final boolean preserveManifest = - ConfigurationManager.getBooleanProperty("mets.submission.preserveManifest", false); - /** * An instance of MdrefManager holds the state needed to * retrieve the contents (or bitstream corresponding to) an * external metadata stream referenced by an mdRef * element in the METS manifest. *

- * Initialize it with the DSpace Bundle containing all of the + * Initialize it with the Content (ORIGINAL) Bundle containing all of the * metadata bitstreams. Match an mdRef by finding the bitstream * with the same name. */ protected class MdrefManager implements METSManifest.Mdref { - private Bundle mdBundle = null; - - // constructor initializes metadata bundle. - private MdrefManager(Bundle mdBundle) + private Map packageFiles = null; + private Context context; + private PackageParameters params; + + // constructor initializes metadata map (from package) + private MdrefManager(Context context, + Map packageFiles, + PackageParameters params) { super(); - this.mdBundle = mdBundle; + this.context = context; + this.packageFiles = packageFiles; + this.params = params; } /** @@ -137,12 +157,13 @@ * @return bitstream or null if none found. */ public Bitstream getBitstreamForMdRef(Element mdref) - throws MetadataValidationException, IOException, SQLException, AuthorizeException + throws MetadataValidationException, PackageValidationException, + IOException, SQLException, AuthorizeException { String path = METSManifest.getFileName(mdref); - if (mdBundle == null) - throw new MetadataValidationException("Failed referencing mdRef element, because there were no metadata files."); - return mdBundle.getBitstreamByName(path); + if (packageFiles == null) + throw new MetadataValidationException("Failed referencing mdRef element, because there is no map of package files."); + return resolveBitstream(context, path, mdref, packageFiles, params); } /** @@ -153,7 +174,8 @@ * @return the input stream of its content. */ public InputStream getInputStream(Element mdref) - throws MetadataValidationException, IOException, SQLException, AuthorizeException + throws MetadataValidationException, PackageValidationException, + IOException, SQLException, AuthorizeException { Bitstream mdbs = getBitstreamForMdRef(mdref); if (mdbs == null) @@ -163,9 +185,10 @@ } /** - * Create a new DSpace item out of a METS content package. + * Create a new DSpace object out of a METS content package. * All contents are dictated by the METS manifest. - * Package is a ZIP archive, all files relative to top level + * Package is a ZIP archive (or optionally bare manifest XML document). + * In a Zip, all files relative to top level * and the manifest (as per spec) in mets.xml. * * @param context - DSpace context. @@ -176,100 +199,175 @@ * @throws PackageValidationException if package is unacceptable or there is * a fatal error turning it into an Item. */ - public WorkspaceItem ingest(Context context, Collection collection, + public IngestionWrapper ingest(Context context, DSpaceObject parent, InputStream pkg, PackageParameters params, String license) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException { - ZipInputStream zip = new ZipInputStream(pkg); - HashMap fileIdToBitstream = new HashMap(); - WorkspaceItem wi = null; + DSpaceObject dso = null; boolean success = false; HashSet packageFiles = new HashSet(); - + Bitstream manifestBitstream = null; + IngestionWrapper result = null; boolean validate = params.getBooleanProperty("validate", true); + METSManifest manifest; + + // map of bitstream name -> bitstream object; delete any unused ones. + HashMap deleteMe = new HashMap(); try { - /* 1. Read all the files in the Zip into bitstreams first, - * because we only get to take one pass through a Zip input - * stream. Give them temporary bitstream names corresponding - * to the same names they had in the Zip, since those MUST - * match the URL references in and elements. - */ - METSManifest manifest = null; - wi = WorkspaceItem.create(context, collection, false); - Item item = wi.getItem(); - Bundle contentBundle = item.createBundle(Constants.CONTENT_BUNDLE_NAME); - Bundle mdBundle = null; + int type; + MdrefManager callback; + try + { + // need to bypass privilege checks to + // create and manipulate "unattached" bitstreams + context.setIgnoreAuthorization(true); + + // 1. read "package" stream: it will be either bare Manifest + // or Package contents into bitstreams, depending on params: + if (params.getBooleanProperty("manifestOnly", false) || + params.getBooleanProperty("internal", false)) + { + manifestBitstream = Bitstream.create(context, pkg); + manifestBitstream.setName(METSManifest.MANIFEST_FILE); + manifestBitstream.setSource(METSManifest.MANIFEST_FILE); + deleteMe.put(METSManifest.MANIFEST_FILE, manifestBitstream); + manifestBitstream.update(); + + if (log.isDebugEnabled()) + log.debug("Got standalone manifest, len="+String.valueOf(manifestBitstream.getSize())); + } + else + { + ZipInputStream zip = new ZipInputStream(pkg); ZipEntry ze; while ((ze = zip.getNextEntry()) != null) { if (ze.isDirectory()) continue; - Bitstream bs = null; + Bitstream bs = Bitstream.create(context, new PackageUtils.UnclosableInputStream(zip)); String fname = ze.getName(); - if (fname.equals(MANIFEST_FILE)) - { - if (preserveManifest) - { - mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME); - bs = mdBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip)); - bs.setName(fname); + if (fname.equals(METSManifest.MANIFEST_FILE)) + manifestBitstream = bs; + deleteMe.put(fname, bs); bs.setSource(fname); - - // Get magic bitstream format to identify manifest. - BitstreamFormat manifestFormat = null; - manifestFormat = PackageUtils.findOrCreateBitstreamFormat(context, - MANIFEST_BITSTREAM_FORMAT, "application/xml", - MANIFEST_BITSTREAM_FORMAT+" package manifest"); - bs.setFormat(manifestFormat); - - manifest = METSManifest.create(bs.retrieve(), validate); + bs.setName(fname); + AuthorizeManager.addPolicy(context, bs, Constants.READ, context.getCurrentUser()); + bs.update(); + packageFiles.add(fname); } - else - { - manifest = METSManifest.create(new PackageUtils.UnclosableInputStream(zip), validate); - continue; + zip.close(); + if (log.isDebugEnabled()) + log.debug("Got Zip, manifestBitstream="+manifestBitstream); } } - else + finally { - bs = contentBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip)); - bs.setSource(fname); - bs.setName(fname); + context.setIgnoreAuthorization(false); } - packageFiles.add(fname); - bs.setSource(fname); - bs.update(); - } - zip.close(); - if (manifest == null) - throw new PackageValidationException("No METS Manifest found (filename="+MANIFEST_FILE+"). Package is unacceptable."); + // 2. parse the manifest and sanity-check it. + if (manifestBitstream == null) + throw new PackageValidationException("No METS Manifest found (filename="+METSManifest.MANIFEST_FILE+"). Package is unacceptable."); + manifest = METSManifest.create(manifestBitstream.retrieve(), + validate, getConfigurationName()); + + // 3. Extract object type and instantiate the new object: + type = getObjectType(manifest); + callback = new MdrefManager(context, deleteMe, params); + result = IngestionWrapper.create(type); + + // 4. crosswalk sourceMD first, for the object to fill in + // parent, submitter, handle, etc if applicable: (as for AIP) + // ..but set a default parent if there is no sourceMD. + if (!manifest.crosswalkObjectSourceMD(context, result, callback)) + result.setParent(parent); - // initial sanity checks on manifest (in subclass) checkManifest(manifest); - /* 2. Grovel a file list out of METS Manifest and compare - * it to the files in package, as an integrity test. + if (log.isDebugEnabled()) + log.debug("After object TechMD: parent="+result.getParent().getHandle()+ + ", hdl="+result.getHandle()); + + // 5. create shell of object -- use parent and handle from + // TechMD above, if any, and if options don't say to ignore them: + if (params.getBooleanProperty("ignoreHandle", false)) + result.setHandle(null); + // check that package included a resolvable parent + if (params.getBooleanProperty("ignoreParent", false)) + result.setParent(parent); + else if (result.getParent() == null) + throw new PackageValidationException("The parent object required to ingest this package could not be resolved. Check manifest for details."); + result = result.createWrappedObject(context); + dso = result.getWrappedObject(); + + // 4. crosswalk techMD, digiprovMD, rightsMD now that there + // is a target object. + manifest.crosswalkObjectOtherAdminMD(context, dso, callback); + + // 6. Object-type-specific logic: Ingesting an ITEM + if (type == Constants.ITEM) + { + // XXX FIXME: maybe add an option to apply template Item on ingest + + Item item = (Item)dso; + Bundle contentBundle = item.createBundle(Constants.CONTENT_BUNDLE_NAME); + Bundle mdBundle = null; + + // save manifest bitstream in Item if desired, give it + // "magic" bitstream format to find it later for dissemination. + if (preserveManifest()) + { + mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME); + mdBundle.addBitstream(manifestBitstream); + + // Get magic bitstream format to identify manifest. + String fmtName = getManifestBitstreamFormat(); + if (fmtName == null) + throw new PackageValidationException("Configuration Error: No Manifest BitstreamFormat configured for METS ingester type="+getConfigurationName()); + BitstreamFormat manifestFormat = PackageUtils.findOrCreateBitstreamFormat(context, + fmtName, "application/xml", + fmtName+" package manifest"); + manifestBitstream.setFormat(manifestFormat); + manifestBitstream.update(); + deleteMe.remove(METSManifest.MANIFEST_FILE); + } + + // -------- Acquire all the bitstreams + /* Grovel a content-file list out of METS Manifest -- + * - check or acquire a bitstream for each one + * - crosswalk any bitstream AMD and cleanup + * - put each bitstream in the appropriate bundle + * - mark Item's primary bitstream */ + String pbsID = null; + Element pbsFile = manifest.getPrimaryOrLogoBitstream(); + if (pbsFile != null) + { + pbsID = pbsFile.getAttributeValue("ID"); + if (log.isDebugEnabled()) + log.debug("Got primary bitstream file ID=\""+pbsID+"\""); + } + List manifestContentFiles = manifest.getContentFiles(); + HashSet missingFiles = new HashSet(); // Compare manifest files with the ones found in package: // a. Start with content files (mentioned in s) - HashSet missingFiles = new HashSet(); + boolean setPBS = false; + BitstreamFormat unknownFormat = BitstreamFormat.findUnknown(context); for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) { - // First locate corresponding Bitstream and make - // map of Bitstream to ID. Element mfile = (Element)mi.next(); - String mfileId = mfile.getAttributeValue("ID"); - if (mfileId == null) + String mfileID = mfile.getAttributeValue("ID"); + if (mfileID == null) throw new PackageValidationException("Invalid METS Manifest: file element without ID attribute."); String path = METSManifest.getFileName(mfile); - Bitstream bs = contentBundle.getBitstreamByName(path); + Bitstream bs = resolveBitstream(context, path, mfile, + deleteMe, params); if (bs == null) { log.warn("Cannot find bitstream for filename=\""+path+ @@ -278,32 +376,12 @@ } else { - fileIdToBitstream.put(mfileId, bs); - - // Now that we're done using Name to match to , - // set default bitstream Name to last path element; - // Zip entries all have '/' pathname separators - // NOTE: set default here, hopefully crosswalk of - // a bitstream techMD section will override it. - String fname = bs.getName(); - int lastSlash = fname.lastIndexOf('/'); - if (lastSlash >= 0 && lastSlash+1 < fname.length()) - bs.setName(fname.substring(lastSlash+1)); - - // Set Default bitstream format: - // 1. attempt to guess from MIME type - // 2. if that fails, guess from "name" extension. - String mimeType = mfile.getAttributeValue("MIMETYPE"); - BitstreamFormat bf = (mimeType == null) ? null : - BitstreamFormat.findByMIMEType(context, mimeType); - if (bf == null) - bf = FormatIdentifier.guessFormat(context, bs); - bs.setFormat(bf); + // build compare lists by deleting matches. + if (packageFiles.contains(path)) + packageFiles.remove(path); - // if this bitstream belongs in another Bundle, move it: + // attach bitstream to correct bundle. String bundleName = manifest.getBundleName(mfile); - if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) - { Bundle bn; Bundle bns[] = item.getBundles(bundleName); if (bns != null && bns.length > 0) @@ -311,50 +389,69 @@ else bn = item.createBundle(bundleName); bn.addBitstream(bs); - contentBundle.removeBitstream(bs); + + // crosswalk bitstream's AMD + manifest.crosswalkBitstream(context, bs, mfileID, callback); + + // is this the primary bitstream? + if (pbsID != null && mfileID.equals(pbsID)) + { + bn.setPrimaryBitstreamID(bs.getID()); + bn.update(); + setPBS = true; } - // finally, build compare lists by deleting matches. - if (packageFiles.contains(path)) - packageFiles.remove(path); - else - missingFiles.add(path); + // Some optional subclass business to second-guess + // values for details of bitstream: + finishBitstream(context, bs, mfile, manifest, params); + + /** + * Last-ditch attempt to divine the format, if crosswalk failed to set it: + * 1. attempt to guess from MIME type + * 2. if that fails, guess from "name" extension. + */ + if (bs.getFormat().equals(unknownFormat)) + { + if (log.isDebugEnabled()) + log.debug("Guessing format of Bitstream left un-set: "+bs.toString()); + String mimeType = mfile.getAttributeValue("MIMETYPE"); + BitstreamFormat bf = (mimeType == null) ? null : + BitstreamFormat.findByMIMEType(context, mimeType); + if (bf == null) + bf = FormatIdentifier.guessFormat(context, bs); + bs.setFormat(bf); + } + bs.update(); } } - // b. Process files mentioned in s - check and move - // to METADATA bundle. + // sanity check for primary bitstream + if (pbsID != null && !setPBS) + log.warn("Could not find primary bitstream file ID=\""+pbsID+"\""); + + // b. Account for package members mentioned in s -- + // just correct the file lists, don't crosswalk yet. for (Iterator mi = manifest.getMdFiles().iterator(); mi.hasNext(); ) { Element mdref = (Element)mi.next(); String path = METSManifest.getFileName(mdref); - // finally, build compare lists by deleting matches. if (packageFiles.contains(path)) packageFiles.remove(path); else missingFiles.add(path); - - // if there is a bitstream with that name in Content, move - // it to the Metadata bundle: - Bitstream mdbs = contentBundle.getBitstreamByName(path); - if (mdbs != null) - { - if (mdBundle == null) - mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME); - mdBundle.addBitstream(mdbs); - contentBundle.removeBitstream(mdbs); - } } + // -------- Sanity checks of bitstreams vs. manifest + // KLUDGE: make sure Manifest file doesn't get flagged as missing // or extra, since it won't be mentioned in the manifest. - if (packageFiles.contains(MANIFEST_FILE)) - packageFiles.remove(MANIFEST_FILE); + if (packageFiles.contains(METSManifest.MANIFEST_FILE)) + packageFiles.remove(METSManifest.MANIFEST_FILE); // Give subclass a chance to refine the lists of in-package // and missing files, delete extraneous files, etc. - checkPackageFiles(packageFiles, missingFiles, manifest); + checkPackageFiles(packageFiles, missingFiles, manifest, params); // Any discrepency in file lists is a fatal error: if (!(packageFiles.isEmpty() && missingFiles.isEmpty())) @@ -375,111 +472,119 @@ throw new PackageValidationException(msg.toString()); } - /* 3. crosswalk the metadata - */ - // get mdref'd streams from "callback" object. - MdrefManager callback = new MdrefManager(mdBundle); + // have subclass manage license since it may be extra package file. + addLicense(context, item, license, (Collection)result.getParent(), params); - chooseItemDmd(context, item, manifest, callback, manifest.getItemDmds()); + // XXX FIXME + // should set lastModifiedTime e.g. when ingesting AIP. + // maybe only do it in the finishObject() callback for AIP. - // crosswalk content bitstreams too. - for (Iterator ei = fileIdToBitstream.entrySet().iterator(); - ei.hasNext();) + } // if ITEM + else if (type == Constants.COLLECTION || + type == Constants.COMMUNITY) { - Map.Entry ee = (Map.Entry)ei.next(); - manifest.crosswalkBitstream(context, (Bitstream)ee.getValue(), - (String)ee.getKey(), callback); - } + Element logoFile = manifest.getPrimaryOrLogoBitstream(); + if (logoFile != null) + { + String logoID = logoFile.getAttributeValue("ID"); + if (log.isDebugEnabled()) + log.debug("Got logo bitstream file ID=\""+logoID+"\""); - // Take a second pass over files to correct names of derived files - // (e.g. thumbnails, extracted text) to what DSpace expects: - for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) + for (Iterator mi = manifest.getContentFiles().iterator(); mi.hasNext(); ) { Element mfile = (Element)mi.next(); - String bundleName = manifest.getBundleName(mfile); - if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) + if (logoID.equals(mfile.getAttributeValue("ID"))) { - Element origFile = manifest.getOriginalFile(mfile); - if (origFile != null) + String path = METSManifest.getFileName(mfile); + Bitstream bs = resolveBitstream(context, path, mfile, + deleteMe, params); + if (bs == null) { - String ofileId = origFile.getAttributeValue("ID"); - Bitstream obs = (Bitstream)fileIdToBitstream.get(ofileId); - String newName = makeDerivedFilename(bundleName, obs.getName()); - if (newName != null) - { - String mfileId = mfile.getAttributeValue("ID"); - Bitstream bs = (Bitstream)fileIdToBitstream.get(mfileId); - bs.setName(newName); - bs.update(); - } - } - } + log.error("Cannot find bitstream for filename=\""+path+ + "\", skipping it..may cause problems later."); + throw new PackageValidationException("Cannot resolve bitstream for logo, from file ID="+logoID); } - - // Sanity-check the resulting metadata on the Item: - PackageUtils.checkMetadata(item); - - /* 4. Set primary bitstream; same Bundle - */ - Element pbsFile = manifest.getPrimaryBitstream(); - if (pbsFile != null) - { - Bitstream pbs = (Bitstream)fileIdToBitstream.get(pbsFile.getAttributeValue("ID")); - if (pbs == null) - log.error("Got Primary Bitstream file ID="+pbsFile.getAttributeValue("ID")+ - ", but found no corresponding bitstream."); else { - Bundle bn[] = pbs.getBundles(); - if (bn.length > 0) - bn[0].setPrimaryBitstreamID(pbs.getID()); + // build compare lists by deleting matches. + if (packageFiles.contains(path)) + packageFiles.remove(path); + if (dso.getType() == Constants.COLLECTION) + ((Collection)dso).setLogo(bs.retrieve()); else - log.error("Sanity check, got primary bitstream without any parent bundle."); + ((Community)dso).setLogo(bs.retrieve()); + } + break; + } + } } } + else + throw new PackageValidationException("Unknown DSpace Object type in package, type="+String.valueOf(type)); - // have subclass manage license since it may be extra package file. - addLicense(context, collection, item, manifest, callback, license ); + /* 7. Crosswalk the DMD for the object; + * if an Item, also sanity-check the metadata for minimum reqs. + */ + crosswalkObjectDmd(context, dso, manifest, callback, manifest.getItemDmds()); - // subclass hook for final checks and rearrangements - finishItem(context, item); + if (type == Constants.ITEM) + PackageUtils.checkMetadata((Item)dso); - // commit any changes to bundles - Bundle allBn[] = item.getBundles(); - for (int i = 0; i < allBn.length; ++i) - { - allBn[i].update(); - } + // 8. subclass hook for final checks and rearrangements + finishObject(context, dso); - wi.update(); + // this also updates the wrapped object. + if (result != null) + result.update(); success = true; log.info(LogManager.getHeader(context, "ingest", - "Created new Item, db ID="+String.valueOf(item.getID())+ - ", WorkspaceItem ID="+String.valueOf(wi.getID()))); - return wi; + "Created new Object, type="+String.valueOf(dso.getType())+ + ", dbID="+String.valueOf(dso.getID()))); + return result; } catch (SQLException se) { // disable attempt to delete the workspace object, since // database may have suffered a fatal error and the // transaction rollback will get rid of it anyway. - wi = null; + result = null; // Pass this exception on to the next handler. throw se; } finally { + // get rid of any leftover package files + for (Iterator bi = deleteMe.keySet().iterator(); + bi.hasNext();) + { + String fname = (String)bi.next(); + Bitstream bs = (Bitstream)deleteMe.get(fname); + if (log.isDebugEnabled()) + log.debug("Queueing leftover bitstream named \""+fname+"\" for deletion."); + if (result != null) + result.addObjectToDelete(bs); + } + deleteMe.clear(); + + if (result != null) + { // kill item (which also deletes bundles, bitstreams) if ingest fails - if (!success && wi != null) - wi.deleteAll(); + if (!success) + result.deleteAll(); + + // cleanup unneeded registered objects. + else + result.cleanup(); + } } } /** * XXX FIXME Replace is not implemented yet. */ - public Item replace(Context ctx, Item item, InputStream pckage, PackageParameters params) + public DSpaceObject replace(Context context, DSpaceObject dso, + InputStream in, PackageParameters params) throws PackageException, UnsupportedOperationException, CrosswalkException, AuthorizeException, SQLException, IOException @@ -487,19 +592,18 @@ throw new UnsupportedOperationException("The replace operation is not implemented."); } - // return name of derived file as if MediaFilter created it, or null - private String makeDerivedFilename(String bundleName, String origName) + // whether or not to save manifest as a bitstream in METADATA bndl. + private boolean preserveManifest() { - // get the MediaFilter that would create this bundle: - String mfNames[] = PluginManager.getAllPluginNames(MediaFilter.class); + return ConfigurationManager.getBooleanProperty("mets."+ + getConfigurationName()+".ingest.preserveManifest", false); + } - for (int i = 0; i < mfNames.length; ++i) + // return short name of manifest bitstream format + private String getManifestBitstreamFormat() { - MediaFilter mf = (MediaFilter)PluginManager.getNamedPlugin(MediaFilter.class, mfNames[i]); - if (bundleName.equals(mf.getBundleName())) - return mf.getFilteredName(origName); - } - return null; + return ConfigurationManager.getProperty("mets."+ + getConfigurationName()+".ingest.manifestBitstreamFormat"); } /** @@ -537,8 +641,10 @@ * @param missingFiles files referenced by manifest but not in package * */ - abstract public void checkPackageFiles(Set packageFiles, Set missingFiles, - METSManifest manifest) + abstract public void checkPackageFiles(Set packageFiles, + Set missingFiles, + METSManifest manifest, + PackageParameters params) throws PackageValidationException, CrosswalkException; /** @@ -547,7 +653,7 @@ * (if any) of the metadata sections to crosswalk to get the * descriptive metadata for the item being ingested. It is * responsible for calling the crosswalk, using the manifest's helper - * i.e. manifest.crosswalkItem(context,item,dmdElement,callback); + * i.e. manifest.crosswalkItemDmd(context,item,dmdElement,callback); * (The final argument is a reference to itself since the * class also implements the METSManifest.MdRef interface * to fetch package files referenced by mdRef elements.) @@ -559,10 +665,10 @@ * @param dmds array of Elements, each a METS dmdSec that applies to the Item as a whole. * */ - abstract public void chooseItemDmd(Context context, Item item, + abstract public void crosswalkObjectDmd(Context context, DSpaceObject dso, METSManifest manifest, MdrefManager cb, Element dmds[]) - throws CrosswalkException, + throws CrosswalkException, PackageValidationException, AuthorizeException, SQLException, IOException; /** @@ -577,15 +683,17 @@ * information of interest, e.g. a Creative Commons license. *

* This framework does not add any licenses by default. + *

+ * Note that crosswalking rightsMD sections can also add a deposit or CC + * license to the object. * * @param context the DSpace context * @param collection DSpace Collection to which the item is being submitted. * @param license optional user-supplied Deposit License text (may be null) */ - abstract public void addLicense(Context context, Collection collection, - Item item, METSManifest manifest, - MdrefManager callback, String license) - throws PackageValidationException, CrosswalkException, + abstract public void addLicense(Context context, Item item, String license, + Collection collection, PackageParameters params) + throws PackageValidationException, AuthorizeException, SQLException, IOException; /** @@ -597,8 +705,46 @@ * * @param context the DSpace context */ - abstract public void finishItem(Context context, Item item) + abstract public void finishObject(Context context, DSpaceObject dso) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException; + /** + * Determines what type of DSpace object is represented in this METS doc. + * @returns one of the object types in Constants. + */ + abstract public int getObjectType(METSManifest manifest) + throws PackageValidationException; + + /** + * Find the Bitstream corresponding to the given pathname relative + * to the package. If it identifies a member of the packageFiles + * map, the implementation should delete it to mark it as processed. + * @return Bitstream object, never null. + */ + abstract public Bitstream resolveBitstream(Context context, + String path, + Element mfile, + Map packageFiles, + PackageParameters params) + throws SQLException, PackageValidationException; + + /** + * Subclass-dependent final processing on a Bitstream; could include + * fixing up the name, bundle, other attributes. + */ + abstract public void finishBitstream(Context context, + Bitstream bs, + Element mfile, + METSManifest manifest, + PackageParameters params) + throws MetadataValidationException, SQLException, AuthorizeException, IOException; + + + /** + * Returns keyword that makes the configuration keys of this subclass + * unique, e.g. if it returns NAME, they key would be: + * "mets.ingest.NAME.preserveManifest = true" + */ + abstract public String getConfigurationName(); } Index: src/org/dspace/content/packager/DSpaceMETSDisseminator.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/packager/DSpaceMETSDisseminator.java,v retrieving revision 1.1 diff -b -w -u -r1.1 DSpaceMETSDisseminator.java --- src/org/dspace/content/packager/DSpaceMETSDisseminator.java 17 Mar 2006 00:04:38 -0000 1.1 +++ src/org/dspace/content/packager/DSpaceMETSDisseminator.java 4 Jan 2007 08:08:33 -0000 @@ -42,18 +42,25 @@ import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.sql.SQLException; +import java.util.Date; +import java.util.List; +import java.util.ArrayList; import org.apache.log4j.Logger; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Bitstream; import org.dspace.content.BitstreamFormat; import org.dspace.content.Bundle; +import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.core.Constants; +import org.dspace.core.ConfigurationManager; import org.dspace.core.Context; import org.dspace.license.CreativeCommons; +import edu.harvard.hul.ois.mets.Agent; import edu.harvard.hul.ois.mets.AmdSec; import edu.harvard.hul.ois.mets.BinData; import edu.harvard.hul.ois.mets.Loctype; @@ -61,9 +68,14 @@ import edu.harvard.hul.ois.mets.MdWrap; import edu.harvard.hul.ois.mets.Mdtype; import edu.harvard.hul.ois.mets.Mets; +import edu.harvard.hul.ois.mets.MetsHdr; import edu.harvard.hul.ois.mets.RightsMD; +import edu.harvard.hul.ois.mets.Role; import edu.harvard.hul.ois.mets.helper.Base64; import edu.harvard.hul.ois.mets.helper.MetsException; +import edu.harvard.hul.ois.mets.Type; +import edu.harvard.hul.ois.mets.Name; +import edu.harvard.hul.ois.mets.helper.PCData; /** * Packager plugin to produce a @@ -97,12 +109,19 @@ private final static String PROFILE_LABEL = "DSpace METS SIP Profile 1.0"; // MDTYPE value for deposit license -- "magic string" + // NOTE: format is : private final static String DSPACE_DEPOSIT_LICENSE_MDTYPE = - "DSpace Deposit License"; + "DSpaceDepositLicense:DSPACE_DEPLICENSE"; - // MDTYPE value for CC license -- "magic string" - private final static String CREATIVE_COMMONS_LICENSE_MDTYPE = - "Creative Commons"; + // MDTYPE value for CC license in RDF -- "magic string" + // NOTE: format is : + private final static String CREATIVE_COMMONS_RDF_MDTYPE = + "CreativeCommonsRDF:DSPACE_CCRDF"; + + // MDTYPE value for CC license in Text -- "magic string" + // NOTE: format is : + private final static String CREATIVE_COMMONS_TEXT_MDTYPE = + "CreativeCommonsText:DSPACE_CCTXT"; /** * Return identifier string for the profile this produces. @@ -131,17 +150,42 @@ } /** + * Create metsHdr element - separate so subclasses can override. + */ + public MetsHdr makeMetsHdr(Context context, DSpaceObject dso, + PackageParameters params) + { + MetsHdr metsHdr = new MetsHdr(); + metsHdr.setCREATEDATE(new Date()); // FIXME: CREATEDATE is now: + // maybe should be item create + // date? + + // Agent + Agent agent = new Agent(); + agent.setROLE(Role.CUSTODIAN); + agent.setTYPE(Type.ORGANIZATION); + Name name = new Name(); + name.getContent() + .add(new PCData(ConfigurationManager + .getProperty("dspace.name"))); + agent.getContent().add(name); + metsHdr.getContent().add(agent); + return metsHdr; + } + + + /** * Get DMD choice for Item. It defaults to MODS, but is overridden * by the package parameters if they contain any "dmd" keys. The * params may contain one or more values for "dmd"; each of those is * the name of a crosswalk plugin, optionally followed by colon and * its METS MDTYPE name. */ - public String [] getDmdTypes(PackageParameters params) + public String [] getDmdTypes(Context context, DSpaceObject dso, PackageParameters params) throws SQLException, IOException, AuthorizeException { - // XXX maybe let dmd choices be configured in DSpace config too? + // XXX FIXME maybe let dmd choices be configured in DSpace config? String result[] = null; if (params != null) @@ -159,144 +203,71 @@ * Default is PREMIS. This is both the name of the crosswalk plugin * and the METS MDTYPE. */ - public String getTechMdType(PackageParameters params) + public String[] getTechMdTypes(Context context, DSpaceObject dso, PackageParameters params) throws SQLException, IOException, AuthorizeException { - return "PREMIS"; - } - - /** - * Add rights MD (licenses) for DSpace item. These - * may include a deposit license, and Creative Commons. - */ - public void addRightsMd(Context context, Item item, AmdSec amdSec) - throws SQLException, IOException, AuthorizeException, MetsException + if (dso.getType() == Constants.BITSTREAM) { - addDepositLicense(context, item, amdSec); - addCreativeCommons(context, item, amdSec); + String result[] = new String[1]; + result[0] = "PREMIS"; + return result; } - - // Add deposit license, if any, as external file. - // Give it a unique name including the SID in case there are other - // deposit license artifacts in the Item. - private boolean addDepositLicense(Context context, Item item, AmdSec amdSec) - throws SQLException, IOException, AuthorizeException, MetsException - { - Bitstream licenseBs = findDepositLicense(context, item); - - if (licenseBs == null) - return false; else - { - String resource = "depositlicense_"+ - String.valueOf(licenseBs.getSequenceID())+".txt"; - addRightsStream(licenseBs.retrieve(), resource, "text/plain", - DSPACE_DEPOSIT_LICENSE_MDTYPE, amdSec); - return true; - } + return new String[0]; } - // if there's a CC RDF license, chuck it in external file. - private boolean addCreativeCommons(Context context, Item item, AmdSec amdSec) - throws SQLException, IOException, AuthorizeException, MetsException + public String[] getSourceMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException { - // License as base64encoded... - Bitstream cc; + return new String[0]; + } - if ((cc = CreativeCommons.getLicenseRdfBitstream(item)) != null) + public String[] getDigiprovMdTypes(Context context, DSpaceObject dso, PackageParameters params) + throws SQLException, IOException, AuthorizeException { - addRightsStream(cc.retrieve(), - (gensym("creativecommons") + ".rdf"), - "text/rdf", - CREATIVE_COMMONS_LICENSE_MDTYPE, amdSec); - } - else if ((cc = CreativeCommons.getLicenseTextBitstream(item)) != null) - { - addRightsStream(cc.retrieve(), - (gensym("creativecommons") + ".txt"), - "text/plain", - CREATIVE_COMMONS_LICENSE_MDTYPE, amdSec); - } - else - return false; - return true; + return new String[0]; } - // utility to add a stream to the METS manifest. - // use external file and mdRef if possible, wrap and binData if not. - private void addRightsStream(InputStream is , String resourceName, - String mimeType, String mdType, AmdSec amdSec) - throws IOException, MetsException - { - RightsMD rightsMD = new RightsMD(); - rightsMD.setID(gensym("rights")); - if (extraFiles == null) - { - MdWrap rightsMDWrap = new MdWrap(); - rightsMDWrap.setMIMETYPE(mimeType); - rightsMDWrap.setMDTYPE(Mdtype.OTHER); - rightsMDWrap.setOTHERMDTYPE(mdType); - BinData bin = new BinData(); - bin.getContent().add(new Base64(is)); - rightsMDWrap.getContent().add(bin); - rightsMD.getContent().add(rightsMDWrap); - } - else + public String makeBitstreamURL(Bitstream bitstream, PackageParameters params) { - extraFiles.put(resourceName, is); - MdRef rightsMDRef = new MdRef(); - rightsMDRef.setMIMETYPE(mimeType); - rightsMDRef.setMDTYPE(Mdtype.OTHER); - rightsMDRef.setOTHERMDTYPE(mdType); - rightsMDRef.setLOCTYPE(Loctype.URL); - rightsMDRef.setXlinkHref(resourceName); - rightsMD.getContent().add(rightsMDRef); - } - amdSec.getContent().add(rightsMD); + String base = "bitstream_"+String.valueOf(bitstream.getID()); + String ext[] = bitstream.getFormat().getExtensions(); + return (ext.length > 0) ? base+"."+ext[0] : base; } /** - * Utility to find the license bitstream from an item - * - * @param context - * DSpace context - * @param item - * the item - * @return the license bitstream or null - * - * @throws IOException - * if the license bitstream can't be read + * Add rights MD (licenses) for DSpace item. These + * may include a deposit license, and Creative Commons. */ - private static Bitstream findDepositLicense(Context context, Item item) + public String[] getRightsMdTypes(Context context, DSpaceObject dso, PackageParameters params) throws SQLException, IOException, AuthorizeException { - // get license format ID - int licenseFormatId = -1; - BitstreamFormat bf = BitstreamFormat.findByShortDescription(context, - "License"); - if (bf != null) - licenseFormatId = bf.getID(); + List result = new ArrayList(); - Bundle[] bundles = item.getBundles(Constants.LICENSE_BUNDLE_NAME); - for (int i = 0; i < bundles.length; i++) + if (dso.getType() == Constants.ITEM) { - // Assume license will be in its own bundle - Bitstream[] bitstreams = bundles[i].getBitstreams(); + Item item = (Item)dso; + if (PackageUtils.findDepositLicense(context, item) != null) + result.add(DSPACE_DEPOSIT_LICENSE_MDTYPE); - if (bitstreams[0].getFormat().getID() == licenseFormatId) - { - return bitstreams[0]; + if (CreativeCommons.getLicenseRdfBitstream(item) != null) + result.add(CREATIVE_COMMONS_RDF_MDTYPE); + else if (CreativeCommons.getLicenseTextBitstream(item) != null) + result.add(CREATIVE_COMMONS_TEXT_MDTYPE); } - } - - // Oops! No license! - return null; + return result.toArray(new String[result.size()]); } // This is where we'd elaborate on the default structMap; nothing to add, yet. - public void addStructMap(Context context, Item item, + public void addStructMap(Context context, DSpaceObject dso, PackageParameters params, Mets mets) throws SQLException, IOException, AuthorizeException, MetsException { } + + // only exclude metadata bundles from package. + public boolean includeBundle(Bundle bundle) + { + return ! PackageUtils.isMetaInfoBundle(bundle); + } } Index: src/org/dspace/content/packager/DSpaceMETSIngester.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/packager/DSpaceMETSIngester.java,v retrieving revision 1.1 diff -b -w -u -r1.1 DSpaceMETSIngester.java --- src/org/dspace/content/packager/DSpaceMETSIngester.java 17 Mar 2006 00:04:38 -0000 1.1 +++ src/org/dspace/content/packager/DSpaceMETSIngester.java 4 Jan 2007 08:08:33 -0000 @@ -41,19 +41,26 @@ package org.dspace.content.packager; import java.io.IOException; +import java.io.InputStream; import java.sql.SQLException; import java.util.Set; +import java.util.Map; import org.apache.log4j.Logger; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Bitstream; import org.dspace.content.Bundle; import org.dspace.content.Collection; +import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.content.crosswalk.CrosswalkException; import org.dspace.content.crosswalk.MetadataValidationException; import org.dspace.core.Context; +import org.dspace.core.Constants; +import org.dspace.core.PluginManager; import org.dspace.license.CreativeCommons; +import org.dspace.app.mediafilter.MediaFilter; + import org.jdom.Element; /** @@ -91,9 +98,8 @@ throw new MetadataValidationException("METS has unacceptable PROFILE value, profile="+profile); } - // nothing needed. public void checkPackageFiles(Set packageFiles, Set missingFiles, - METSManifest manifest) + METSManifest manifest, PackageParameters params) throws PackageValidationException, CrosswalkException { // This is where a subclass would arrange to use or ignore @@ -110,15 +116,17 @@ * same GROUPID
* 3. Crosswalk remaining DMDs not eliminated already. */ - public void chooseItemDmd(Context context, Item item, + public void crosswalkObjectDmd(Context context, DSpaceObject dso, METSManifest manifest, AbstractMETSIngester.MdrefManager callback, Element dmds[]) - throws CrosswalkException, + throws CrosswalkException, PackageValidationException, AuthorizeException, SQLException, IOException { int found = -1; + Item item = (Item)dso; + // MODS is preferred for (int i = 0; i < dmds.length; ++i) if ("MODS".equals(manifest.getMdType(dmds[i]))) @@ -135,7 +143,7 @@ String groupID = null; if (found >= 0) { - manifest.crosswalkItem(context, item, dmds[found], callback); + manifest.crosswalkItemDmd(context, item, dmds[found], callback); groupID = dmds[found].getAttributeValue("GROUPID"); if (groupID != null) @@ -144,7 +152,7 @@ { String g = dmds[i].getAttributeValue("GROUPID"); if (g != null && !g.equals(groupID)) - manifest.crosswalkItem(context, item, dmds[i], callback); + manifest.crosswalkItemDmd(context, item, dmds[i], callback); } } } @@ -154,7 +162,7 @@ else { if (dmds.length > 0) - manifest.crosswalkItem(context, item, dmds[0], callback); + manifest.crosswalkItemDmd(context, item, dmds[0], callback); } } @@ -165,52 +173,114 @@ * default deposit license. * For Creative Commons, look for a rightsMd containing a CC license. */ - public void addLicense(Context context, Collection collection, - Item item, METSManifest manifest, - AbstractMETSIngester.MdrefManager callback, - String license) - throws PackageValidationException, CrosswalkException, + public void addLicense(Context context, Item item, String license, + Collection collection, PackageParameters params) + throws PackageValidationException, AuthorizeException, SQLException, IOException { + if (PackageUtils.findDepositLicense(context, item) == null) PackageUtils.addDepositLicense(context, license, item, collection); + } - // If package includes a Creative Commons license, add that: - Element rmds[] = manifest.getItemRightsMD(); - for (int i = 0; i < rmds.length; ++i) + public void finishObject(Context context, DSpaceObject dso) + throws PackageValidationException, CrosswalkException, + AuthorizeException, SQLException, IOException { - String type = manifest.getMdType(rmds[i]); - if (type != null && type.equals("Creative Commons")) + // nothing to do. + } + + public int getObjectType(METSManifest manifest) + throws PackageValidationException { - log.debug("Got Creative Commons license in rightsMD"); - CreativeCommons.setLicense(context, item, - manifest.getMdContentAsStream(rmds[i], callback), - manifest.getMdContentMimeType(rmds[i])); + return Constants.ITEM; + } + - // if there was a bitstream, get rid of it, since - // it's just an artifact now that the CC license is installed. - Element mdRef = rmds[i].getChild("mdRef", METSManifest.metsNS); - if (mdRef != null) + public Bitstream resolveBitstream(Context context, + String path, + Element mfile, + Map packageFiles, + PackageParameters params) + throws SQLException,PackageValidationException { - Bitstream bs = callback.getBitstreamForMdRef(mdRef); - if (bs != null) + if (packageFiles.containsKey(path)) { - Bundle parent[] = bs.getBundles(); - if (parent.length > 0) + Bitstream result = (Bitstream)packageFiles.get(path); + packageFiles.remove(path); + + // Now that we're done using Name to match to , + // set default bitstream Name to last path element; + // e.g. Zip entries all have '/' pathname separators + // NOTE: set default here, hopefully crosswalk of + // a bitstream techMD section will override it. + String fname = result.getName(); + int lastSlash = fname.lastIndexOf('/'); + if (lastSlash >= 0 && lastSlash+1 < fname.length()) + result.setName(fname.substring(lastSlash+1)); + + return result; + } + return null; + } + + // return name of derived file as if MediaFilter created it, or null + // only needed when importing a SIP without canonical DSpace derived file naming. + private String makeDerivedFilename(String bundleName, String origName) + { + // get the MediaFilter that would create this bundle: + String mfNames[] = PluginManager.getAllPluginNames(MediaFilter.class); + + for (int i = 0; i < mfNames.length; ++i) { - parent[0].removeBitstream(bs); - parent[0].update(); + MediaFilter mf = (MediaFilter)PluginManager.getNamedPlugin(MediaFilter.class, mfNames[i]); + if (bundleName.equals(mf.getBundleName())) + return mf.getFilteredName(origName); } + return null; } + + /** + * Take a second pass over files to correct names of derived files + * (e.g. thumbnails, extracted text) to what DSpace expects: + */ + public void finishBitstream(Context context, + Bitstream bs, + Element mfile, + METSManifest manifest, + PackageParameters params) + throws MetadataValidationException, SQLException, AuthorizeException, IOException + { + String bundleName = manifest.getBundleName(mfile); + if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) + { + String opath = manifest.getOriginalFilePath(mfile); + if (opath != null) + { + // String ofileId = origFile.getAttributeValue("ID"); + // Bitstream obs = (Bitstream)fileIdToBitstream.get(ofileId); + + String newName = makeDerivedFilename(bundleName, opath); + + if (newName != null) + { + //String mfileId = mfile.getAttributeValue("ID"); + //Bitstream bs = (Bitstream)fileIdToBitstream.get(mfileId); + bs.setName(newName); + bs.update(); } } } } - // last change to fix up Item. - public void finishItem(Context context, Item item) - throws PackageValidationException, CrosswalkException, - AuthorizeException, SQLException, IOException + + public String getConfigurationName() { - // nothing to do. + return "dspaceSIP"; + } + + + public boolean probe(Context context, InputStream in, PackageParameters params) + { + throw new UnsupportedOperationException("PDF package ingester does not implement probe()"); } } Index: src/org/dspace/content/packager/METSManifest.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/packager/METSManifest.java,v retrieving revision 1.1 diff -b -w -u -r1.1 METSManifest.java --- src/org/dspace/content/packager/METSManifest.java 17 Mar 2006 00:04:38 -0000 1.1 +++ src/org/dspace/content/packager/METSManifest.java 4 Jan 2007 08:08:34 -0000 @@ -60,11 +60,13 @@ import org.dspace.content.crosswalk.CrosswalkObjectNotSupported; import org.dspace.content.crosswalk.MetadataValidationException; import org.dspace.content.crosswalk.IngestionCrosswalk; +import org.dspace.content.crosswalk.StreamIngestionCrosswalk; import org.dspace.core.ConfigurationManager; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.core.PluginManager; import org.jdom.Document; +import org.jdom.Content; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.Namespace; @@ -144,7 +146,8 @@ * @throw AuthorizeException if it is returned by services called by this method. */ public InputStream getInputStream(Element mdRef) - throws MetadataValidationException, IOException, SQLException, AuthorizeException; + throws MetadataValidationException, PackageValidationException, + IOException, SQLException, AuthorizeException; } /** log4j category */ @@ -156,10 +159,10 @@ /** Prefix of DSpace configuration lines that map METS metadata type to * crosswalk plugin names. */ - private final static String CONFIG_METADATA_PREFIX = "mets.submission.crosswalk."; + private final static String CONFIG_METS_PREFIX = "mets."; /** prefix of config lines identifying local XML Schema (XSD) files */ - private final static String CONFIG_XSD_PREFIX = "mets.xsd."; + private final static String CONFIG_XSD_PREFIX = CONFIG_METS_PREFIX+"xsd."; /** Dublin core element namespace */ private static Namespace dcNS = Namespace @@ -174,7 +177,7 @@ .getNamespace("mets", "http://www.loc.gov/METS/"); /** XLink namespace -- includes "xlink" prefix prefix for use in XPaths */ - private static Namespace xlinkNS = Namespace + public static Namespace xlinkNS = Namespace .getNamespace("xlink", "http://www.w3.org/1999/xlink"); /** root element of the current METS manifest. */ @@ -189,13 +192,16 @@ /** builder to use for mdRef streams, inherited from create() */ private SAXBuilder parser = null; + /** name of packager who created this manifest object, for looking up configuration entries. */ + private String configName; + // Create list of local schemas at load time, since it depends only // on the DSpace configuration. private static String localSchemas; static { String dspace_dir = ConfigurationManager.getProperty("dspace.dir"); - File xsdPath1 = new File(dspace_dir+"/config/schemas/"); + File xsdPath1 = new File(dspace_dir+"/config/crosswalks/"); File xsdPath2 = new File(dspace_dir+"/config/"); Enumeration pe = ConfigurationManager.propertyNames(); @@ -206,7 +212,7 @@ // mets.xsd.{identifier} = {namespace} {xsd-URL} // e.g. // mets.xsd.dc = http://purl.org/dc/elements/1.1/ dc.xsd - // (filename is relative to {dspace_dir}/config/schemas/) + // (filename is relative to {dspace_dir}/config/crosswalks/) String key = (String)pe.nextElement(); if (key.startsWith(CONFIG_XSD_PREFIX)) { @@ -239,6 +245,7 @@ } } localSchemas = result.toString(); + if (log.isDebugEnabled()) log.debug("Got local schemas = \""+localSchemas+"\""); } @@ -247,11 +254,12 @@ * @param builder XML parser (for parsing mdRef'd files and binData) * @param mets parsed METS document */ - private METSManifest(SAXBuilder builder, Element mets) + private METSManifest(SAXBuilder builder, Element mets, String configName) { super(); this.mets = mets; parser = builder; + this.configName = configName; } /** @@ -264,12 +272,14 @@ * or validating the METS. * @return new METSManifest object. */ - public static METSManifest create(InputStream is, boolean validate) + public static METSManifest create(InputStream is, boolean validate, String configName) throws IOException, MetadataValidationException { SAXBuilder builder = new SAXBuilder(validate); + builder.setIgnoringElementContentWhitespace(true); + // Set validation feature if (validate) builder.setFeature("http://apache.org/xml/features/validation/schema", @@ -289,12 +299,13 @@ { metsDocument = builder.build(is); - // XXX for temporary debugging - /* - XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); - log.debug("Got METS DOCUMENT:"); - log.debug(outputPretty.outputString(metsDocument)); - */ + /*** XXX leave commented out except if needed for + *** viewing the METS document that actually gets read. + * + * XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); + * log.debug("Got METS DOCUMENT:"); + * log.debug(outputPretty.outputString(metsDocument)); + ****/ } catch (JDOMException je) { @@ -302,7 +313,7 @@ + is.toString(), je); } - return new METSManifest(builder, metsDocument.getRootElement()); + return new METSManifest(builder, metsDocument.getRootElement(), configName); } /** @@ -381,9 +392,9 @@ * attribute is peculiar to the DSpace METS SIP profile, and may not be * generally useful with other sorts of METS documents. * @param file METS file element of derived file - * @return file Element of original or null if none found. + * @return file path of original or null if none found. */ - public Element getOriginalFile(Element file) + public String getOriginalFilePath(Element file) { String groupID = file.getAttributeValue("GROUPID"); if (groupID == null || groupID.equals("")) @@ -397,10 +408,12 @@ List oFiles = xpath.selectNodes(mets); if (oFiles.size() > 0) { + if (log.isDebugEnabled()) log.debug("Got ORIGINAL file for derived="+file.toString()); - return (Element)oFiles.get(0); + Element flocat = ((Element)oFiles.get(0)).getChild("FLocat", metsNS); + if (flocat != null) + return flocat.getAttributeValue("href", xlinkNS); } - else return null; } catch (JDOMException je) @@ -483,7 +496,7 @@ * * @return file element of Item's primary bitstream, or null if there is none. */ - public Element getPrimaryBitstream() + public Element getPrimaryOrLogoBitstream() throws MetadataValidationException { Element firstDiv = getFirstDiv(); @@ -499,7 +512,8 @@ return result; } - /** Get the metadata type from within a *mdSec element. + /** + * Get the metadata type from within a *mdSec element. * @return metadata type name. */ public String getMdType(Element mdSec) @@ -547,10 +561,27 @@ * @throws MetadataValidationException if METS is invalid, or there is an error parsing the XML. */ public List getMdContentAsXml(Element mdSec, Mdref callback) - throws MetadataValidationException, IOException, SQLException, AuthorizeException + throws MetadataValidationException, PackageValidationException, + IOException, SQLException, AuthorizeException { try { + // XXX sanity check: if this has more than one child, consider it + // an error since we cannot deal with more than one mdRef|mdWrap + // child. This may be considered a bug and need to be fixed, + // so it's best to bring it to the attention of users. + List mdc = mdSec.getChildren(); + if (mdc.size() > 1) + { + // XXX scaffolding for debugging diagnosis; at least one + // XML parser stupidly includes newlines in prettyprinting + // as text content objects.. + String id = mdSec.getAttributeValue("ID"); + StringBuffer sb = new StringBuffer(); + for (Iterator mi = mdc.iterator(); mi.hasNext();) + sb.append(", ").append(((Content)mi.next()).toString()); + throw new MetadataValidationException("Cannot parse METS with "+mdSec.getQualifiedName()+" element that contains more than one child, size="+String.valueOf(mdc.size())+", ID="+id+"Kids="+sb.toString()); + } Element mdRef = null; Element mdWrap = mdSec.getChild("mdWrap", metsNS); if (mdWrap != null) @@ -620,7 +651,8 @@ * @throws MetadataValidationException if METS format does not contain any metadata. */ public InputStream getMdContentAsStream(Element mdSec, Mdref callback) - throws MetadataValidationException, IOException, SQLException, AuthorizeException + throws MetadataValidationException, PackageValidationException, + IOException, SQLException, AuthorizeException { Element mdRef = null; Element mdWrap = mdSec.getChild("mdWrap", metsNS); @@ -655,27 +687,6 @@ } - // special call to crosswalk the guts of a metadata *Sec (dmdSec, amdSec) - // because mdRef and mdWrap have to be handled differently. - // It's a lot like getMdContentAsXml but cannot use that because xwalk - // should be called with root element OR list depending on what was given. - private void crosswalkMdContent(Element mdSec, Mdref callback, - IngestionCrosswalk xwalk, Context context, DSpaceObject dso) - throws CrosswalkException, IOException, SQLException, AuthorizeException - { - List xml = getMdContentAsXml(mdSec,callback); - - // if we get inappropriate metadata, e.g. PREMIS for Item, let it go. - try - { - xwalk.ingest(context, dso, xml); - } - catch (CrosswalkObjectNotSupported e) - { - log.warn("Skipping metadata for inappropriate type of object: Object="+dso.toString()+", error="+e.toString()); - } - } - // return first

of first ; // in DSpace profile, this is where item-wide dmd and other metadata // lives as IDrefs. @@ -690,6 +701,7 @@ if (result == null) throw new MetadataValidationException("METS document is missing the required first div element in first structMap."); + if (log.isDebugEnabled()) log.debug("Got firstDiv result="+result.toString()); return (Element)result; } @@ -719,18 +731,26 @@ } // Find crosswalk for the indicated metadata type (e.g. "DC", "MODS") - // The crosswalk plugin name MAY be indirected in config file, - // through an entry like - // mets.submission.crosswalk.{mdType} = {pluginName} - // e.g. - // mets.submission.crosswalk.DC = mysite-QDC - private IngestionCrosswalk getCrosswalk(String type) + private Object getCrosswalk(String type, Class clazz) { - String xwalkName = ConfigurationManager.getProperty(CONFIG_METADATA_PREFIX + type); + /** + * Allow DSpace Config to map the metadata type to a + * different crosswalk name either per-packager or for METS + * in general. First, look for config key like: + * mets..ingest.crosswalk.MDNAME = XWALKNAME + * then try + * mets.default.ingest.crosswalk.MDNAME = XWALKNAME + */ + String xwalkName = ConfigurationManager.getProperty( + CONFIG_METS_PREFIX+configName+".ingest.crosswalk."+type); + if (xwalkName == null) + { + xwalkName = ConfigurationManager.getProperty( + CONFIG_METS_PREFIX+"default.ingest.crosswalk."+type); if (xwalkName == null) xwalkName = type; - return (IngestionCrosswalk) - PluginManager.getNamedPlugin(IngestionCrosswalk.class, xwalkName); + } + return PluginManager.getNamedPlugin(clazz, xwalkName); } /** @@ -769,6 +789,7 @@ String amds = firstDiv.getAttributeValue("ADMID"); if (amds == null) { + if (log.isDebugEnabled()) log.debug("getItemRightsMD: No ADMID references found."); return new Element[0]; } @@ -787,17 +808,144 @@ /** * Invokes appropriate crosswalks on Item-wide descriptive metadata. */ - public void crosswalkItem(Context context, Item item, Element dmd, Mdref callback) - throws MetadataValidationException, + public void crosswalkItemDmd(Context context, DSpaceObject dso, + Element dmdSec, Mdref callback) + throws MetadataValidationException, PackageValidationException, + CrosswalkException, IOException, SQLException, AuthorizeException + { + crosswalkXmd(context, dso, dmdSec, callback); + } + + /** + * Crosswalk all technical and source metadata sections that belong + * to the whole object. + * @throws MetadataValidationException if METS is invalid, e.g. referenced amdSec is missing. + */ + public void crosswalkObjectOtherAdminMD(Context context, DSpaceObject dso, + Mdref callback) + throws MetadataValidationException, PackageValidationException, + CrosswalkException, IOException, SQLException, AuthorizeException + { + for (String amdID : getAmdIDs()) + { + Element amdSec = getElementByXPath("mets:amdSec[@ID=\""+amdID+"\"]", false); + for (Iterator ti = amdSec.getChildren("techMD", metsNS).iterator(); ti.hasNext();) + crosswalkXmd(context, dso, (Element)ti.next(), callback); + for (Iterator ti = amdSec.getChildren("digiprovMD", metsNS).iterator(); ti.hasNext();) + crosswalkXmd(context, dso, (Element)ti.next(), callback); + for (Iterator ti = amdSec.getChildren("rightsMD", metsNS).iterator(); ti.hasNext();) + crosswalkXmd(context, dso, (Element)ti.next(), callback); + } + } + + /** + * Just crosswalk the sourceMD sections; used to set the handle and parent of AIP. + * @return true if any metadata section was actually crosswalked, false otherwise + */ + public boolean crosswalkObjectSourceMD(Context context, DSpaceObject dso, + Mdref callback) + throws MetadataValidationException, PackageValidationException, + CrosswalkException, IOException, SQLException, AuthorizeException + { + boolean result = false; + + for (String amdID : getAmdIDs()) + { + Element amdSec = getElementByXPath("mets:amdSec[@ID=\""+amdID+"\"]", false); + for (Iterator ti = amdSec.getChildren("sourceMD", metsNS).iterator(); ti.hasNext();) + { + crosswalkXmd(context, dso, (Element)ti.next(), callback); + result = true; + } + } + return result; + } + + private String[] getAmdIDs() + throws MetadataValidationException + { + // div@ADMID is actually IDREFS, a space-separated list of IDs: + Element firstDiv = getFirstDiv(); + String amds = firstDiv.getAttributeValue("ADMID"); + if (amds == null) + { + if (log.isDebugEnabled()) + log.debug("crosswalkObjectTechMD: No ADMID references found."); + return new String[0]; + } + return amds.split("\\s+"); + } + + // Crosswalk *any* kind of metadata section - techMD, rightsMD, etc. + private void crosswalkXmd(Context context, DSpaceObject dso, + Element xmd, Mdref callback) + throws MetadataValidationException, PackageValidationException, CrosswalkException, IOException, SQLException, AuthorizeException { - String type = getMdType(dmd); - IngestionCrosswalk xwalk = getCrosswalk(type); + String type = getMdType(xmd); + IngestionCrosswalk xwalk = (IngestionCrosswalk)getCrosswalk(type, IngestionCrosswalk.class); + + // If metadata is not simply applicable to object, + // let it go with a warning. + try + { + // xwalk the DOM-model + if (xwalk != null) + xwalk.ingest(context, dso, getMdContentAsXml(xmd,callback)); - if (xwalk == null) + // try stream-based xwalk + else + { + StreamIngestionCrosswalk sxwalk = + (StreamIngestionCrosswalk)getCrosswalk(type, StreamIngestionCrosswalk.class); + if (sxwalk != null) + { + Element mdRef = xmd.getChild("mdRef", metsNS); + if (mdRef != null) + { + InputStream in = null; + try + { + in = callback.getInputStream(mdRef); + sxwalk.ingest(context, dso, in, + mdRef.getAttributeValue("MIMETYPE")); + } + finally + { + if (in != null) + in.close(); + } + } + else + { + Element mdWrap = xmd.getChild("mdWrap", metsNS); + if (mdWrap != null) + { + Element bin = mdWrap.getChild("binData", metsNS); + if (bin == null) + throw new MetadataValidationException("Invalid METS Manifest: mdWrap element for streaming crosswalk without binData child."); + else + { + byte value[] = Base64.decodeBase64(bin.getText().getBytes()); + sxwalk.ingest(context, dso, + new ByteArrayInputStream(value), + mdWrap.getAttributeValue("MIMETYPE")); + } + } + else throw new MetadataValidationException("Cannot process METS Manifest: "+ - "No crosswalk found for MDTYPE="+type); - crosswalkMdContent(dmd, callback, xwalk, context, item); + "Metadata of type="+type+" requires a reference to a stream (mdRef), which was not found in "+xmd.getName()); + } + } + else + throw new MetadataValidationException("Cannot process METS Manifest: "+ + "No crosswalk found for contents of "+xmd.getName()+" element, MDTYPE="+type); + } + } + catch (CrosswalkObjectNotSupported e) + { + log.warn("Skipping metadata section "+xmd.getName()+", type="+type+" inappropriate for this type of object: Object="+dso.toString()+", error="+e.toString()); + } } /** @@ -810,7 +958,7 @@ */ public void crosswalkBitstream(Context context, Bitstream bitstream, String fileId, Mdref callback) - throws MetadataValidationException, + throws MetadataValidationException, PackageValidationException, CrosswalkException, IOException, SQLException, AuthorizeException { Element file = getElementByXPath("descendant::mets:file[@ID=\""+fileId+"\"]", false); @@ -828,47 +976,19 @@ String amdID[] = amds.split("\\s+"); for (int i = 0; i < amdID.length; ++i) { - List techMDs = getElementByXPath("mets:amdSec[@ID=\""+amdID[i]+"\"]", false). - getChildren("techMD", metsNS); - Iterator ti = techMDs.iterator(); - while (ti.hasNext()) - { - Element techMD = (Element)ti.next(); - if (techMD != null) - { - String type = getMdType(techMD); - IngestionCrosswalk xwalk = getCrosswalk(type); - log.debug("Got bitstream techMD of type="+type+", for file ID="+fileId); - - if (xwalk == null) - throw new MetadataValidationException("Cannot process METS Manifest: "+ - "No crosswalk found for techMD MDTYPE="+type); - crosswalkMdContent(techMD, callback, xwalk, context, bitstream); - } - } + Element amdSec = getElementByXPath("mets:amdSec[@ID=\""+amdID[i]+"\"]", false); + for (Iterator ti = amdSec.getChildren("techMD", metsNS).iterator(); ti.hasNext();) + crosswalkXmd(context, bitstream, (Element)ti.next(), callback); + for (Iterator ti = amdSec.getChildren("sourceMD", metsNS).iterator(); ti.hasNext();) + crosswalkXmd(context, bitstream, (Element)ti.next(), callback); } } /** - * Find Handle (if any) identifier labelling this manifest. - * @return handle (never null) - * @throws MetadataValidationException if no handle available. + * @return root element of METS document. */ - public String getHandle() - throws MetadataValidationException + public Element getMets() { - // TODO: XXX Make configurable? Handle optionally passed in? - // FIXME: Not sure if OBJID is really the right place - - String handle = mets.getAttributeValue("OBJID"); - - if (handle != null && handle.startsWith("hdl:")) - { - return handle.substring(4); - } - else - { - throw new MetadataValidationException("Item has no valid Handle (OBJID)"); - } + return mets; } } Index: src/org/dspace/content/packager/PDFPackager.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/packager/PDFPackager.java,v retrieving revision 1.1 diff -b -w -u -r1.1 PDFPackager.java --- src/org/dspace/content/packager/PDFPackager.java 17 Mar 2006 00:04:38 -0000 1.1 +++ src/org/dspace/content/packager/PDFPackager.java 4 Jan 2007 08:08:34 -0000 @@ -56,6 +56,8 @@ import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.content.WorkspaceItem; +import org.dspace.content.DSpaceObject; +import org.dspace.content.IngestionWrapper; import org.dspace.content.crosswalk.CrosswalkException; import org.dspace.content.crosswalk.MetadataValidationException; import org.dspace.core.Constants; @@ -133,7 +135,7 @@ * @throws PackageException if package is unacceptable or there is * a fatal error turning it into an Item. */ - public WorkspaceItem ingest(Context context, Collection collection, + public IngestionWrapper ingest(Context context, DSpaceObject parent, InputStream pkg, PackageParameters params, String license) throws PackageValidationException, CrosswalkException, @@ -146,33 +148,11 @@ Bitstream bs = null; WorkspaceItem wi = null; - /** XXX comment out for now - // XXX for debugging of parameter handling - if (params != null) - { - Enumeration pe = params.propertyNames(); - while (pe.hasMoreElements()) - { - String name = (String)pe.nextElement(); - String v[] = params.getProperties(name); - StringBuffer msg = new StringBuffer("PackageParam: "); - msg.append(name).append(" = "); - for (int i = 0; i < v.length; ++i) - { - if (i > 0) - msg.append(", "); - msg.append(v[i]); - } - log.debug(msg); - } - } - **/ - try { // Save the PDF in a bitstream first, since the parser // has to read it as well, and we cannot "rewind" it after that. - wi = WorkspaceItem.create(context, collection, false); + wi = WorkspaceItem.create(context, (Collection)parent, false); Item myitem = wi.getItem(); original = myitem.createBundle("ORIGINAL"); bs = original.createBitstream(pkg); @@ -180,6 +160,7 @@ bs.setName("package.pdf"); setFormatToMIMEType(context, bs, "application/pdf"); bs.update(); + if (log.isDebugEnabled()) log.debug("Created bitstream ID="+String.valueOf(bs.getID())+", parsing..."); crosswalkPDF(context, myitem, bs.retrieve()); @@ -220,7 +201,8 @@ /** * Replace is not implemented. */ - public Item replace(Context ctx, Item item, InputStream pckage, PackageParameters params) + public DSpaceObject replace(Context context, DSpaceObject dso, + InputStream in, PackageParameters params) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException, UnsupportedOperationException @@ -305,6 +287,7 @@ // sanity check: item must have a title. if (title == null) throw new MetadataValidationException("This PDF file is unacceptable, it does not have a value for \"Title\" in its Info dictionary."); + if (log.isDebugEnabled()) log.debug("PDF Info dict title=\""+title+"\""); item.addDC("title", null, "en", title); String value; @@ -312,6 +295,7 @@ if ((value = docinfo.getAuthor()) != null) { item.addDC("contributor", "author", null, value); + if (log.isDebugEnabled()) log.debug("PDF Info dict author=\""+value+"\""); } if ((value = docinfo.getCreator()) != null) @@ -341,4 +325,9 @@ cos.close(); } } + + public boolean probe(Context context, InputStream in, PackageParameters params) + { + throw new UnsupportedOperationException("PDF package ingester does not implement probe()"); + } } Index: src/org/dspace/content/packager/PackageIngester.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/packager/PackageIngester.java,v retrieving revision 1.1 diff -b -w -u -r1.1 PackageIngester.java --- src/org/dspace/content/packager/PackageIngester.java 17 Mar 2006 00:04:38 -0000 1.1 +++ src/org/dspace/content/packager/PackageIngester.java 4 Jan 2007 08:08:34 -0000 @@ -45,23 +45,23 @@ import java.sql.SQLException; import org.dspace.authorize.AuthorizeException; -import org.dspace.content.Collection; -import org.dspace.content.Item; -import org.dspace.content.WorkspaceItem; +import org.dspace.content.DSpaceObject; +import org.dspace.content.IngestionWrapper; import org.dspace.content.crosswalk.CrosswalkException; import org.dspace.core.Context; /** * Plugin Interface to interpret a Submission Information Package (SIP) - * and create (or replace) a DSpace item from its contents. + * and create (or replace) a DSpace Object from its contents. *

- * A package is a single data stream - * containing enough information to construct an Item. It can be - * anything from an archive like a Zip file with a manifest and metadata, - * to a simple manifest containing external references to the content, - * to a self-contained file such as a PDF. The interpretation - * of the package is entirely at the discretion of the implementing class. + * A package is a single data stream containing enough information to + * construct an Object (i.e. an Item, Collection, or Community). It + * can be anything from an archive like a Zip file with a manifest and + * metadata, to a simple manifest containing external references to the + * content, to a self-contained file such as a PDF. The interpretation + * of the package is entirely at the discretion of the implementing + * class. *

* The ingest methods are also given an attribute-value * list of "parameters" which may modify their actions. @@ -72,56 +72,57 @@ * @author Larry Stone * @version $Revision: 1.1 $ * @see PackageParameters + * @see IngestionWrapper */ public interface PackageIngester { /** - * Create new Item out of the ingested package. - * The item will belong to the indicated - * collection. This creates a WorkspaceItem, so it is - * up to the caller to decide whether to install it or submit - * it to normal DSpace Workflow. + * Create new DSpaceObject out of the ingested package. The object + * is created under the indicated parent. This creates a + * IngestionWrapper (or its subclass + * WorkspaceItem. For Items, it is up to the caller to + * decide whether to install it or submit it to normal DSpace Workflow. *

- * The deposit license is passed explicitly as a string since there - * is no place for it in many package formats. It is optional and may - * be given as null. + * The deposit license (Only significant for Item) is passed + * explicitly as a string since there is no place for it in many + * package formats. It is optional and may be given as + * null. * * @param context DSpace context. - * @param collection collection under which to create new item. + * @param parent collection under which to create new item. * @param in input stream containing package to ingest. * @param params Properties-style list of options (interpreted by each packager). * @param license may be null, which takes default license. - * @return workspace item created by ingest. + * @return ingestion wrapper (or workspaceitem, for Item) created by ingest. * * @throws PackageValidationException if package is unacceptable or there is * a fatal error turning it into an Item. */ - WorkspaceItem ingest(Context context, Collection collection, InputStream in, + IngestionWrapper ingest(Context context, DSpaceObject parent, InputStream in, PackageParameters params, String license) throws PackageException, CrosswalkException, AuthorizeException, SQLException, IOException; /** - * Replace an existing Item with contents of the ingested package. + * Replace an existing DSpace Object with contents of the ingested package. * The packager may choose not to implement replace, * since it somewhat contradicts the archival nature of DSpace. * The exact function of this method is highly implementation-dependent. * * @param context DSpace context. - * @param item existing item to be replaced + * @param dso existing DSpace Object to be replaced * @param in input stream containing package to ingest. * @param params Properties-style list of options specific to this packager - * @return item re-created by ingest. + * @return object re-created by ingest. * * @throws PackageValidationException if package is unacceptable or there is * a fatal error turning it into an Item. * @throws UnsupportedOperationException if this packager does not * implement replace. */ - Item replace(Context context, Item item, InputStream in, - PackageParameters params) + public DSpaceObject replace(Context context, DSpaceObject dso, + InputStream in, PackageParameters params) throws PackageException, UnsupportedOperationException, CrosswalkException, AuthorizeException, SQLException, IOException; - } Index: src/org/dspace/content/packager/PackageUtils.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/content/packager/PackageUtils.java,v retrieving revision 1.2 diff -b -w -u -r1.2 PackageUtils.java --- src/org/dspace/content/packager/PackageUtils.java 15 Mar 2006 21:52:02 -0000 1.2 +++ src/org/dspace/content/packager/PackageUtils.java 4 Jan 2007 08:08:34 -0000 @@ -249,6 +249,29 @@ String shortDesc, String MIMEType, String desc) throws SQLException, AuthorizeException { + return findOrCreateBitstreamFormat(context, shortDesc, MIMEType, desc, BitstreamFormat.KNOWN, false); + } + + /** + * Find or create a bitstream format to match the given short + * description. + * Used by packager ingesters to obtain a special bitstream + * format for the manifest (and/or metadata) file. + *

+ * NOTE: When creating a new format, do NOT set any extensions, since + * we don't want any file with the same extension, which may be something + * generic like ".xml", to accidentally get set to this format. + * @param context - the context. + * @param shortDesc - short descriptive name, used to locate existing format. + * @param MIMEtype - mime content-type + * @param desc - long description + * @param internal value for the 'internal' flag of a new format if created. + * @return BitstreamFormat object that was found or created. Never null. + */ + public static BitstreamFormat findOrCreateBitstreamFormat(Context context, + String shortDesc, String MIMEType, String desc, int supportLevel, boolean internal) + throws SQLException, AuthorizeException + { BitstreamFormat bsf = BitstreamFormat.findByShortDescription(context, shortDesc); // not found, try to create one @@ -258,9 +281,49 @@ bsf.setShortDescription(shortDesc); bsf.setMIMEType(MIMEType); bsf.setDescription(desc); - bsf.setSupportLevel(BitstreamFormat.KNOWN); + bsf.setSupportLevel(supportLevel); + bsf.setInternal(internal); bsf.update(); } return bsf; } + + /** + * Utility to find the license bitstream from an item + * + * @param context + * DSpace context + * @param item + * the item + * @return the license bitstream or null + * + * @throws IOException + * if the license bitstream can't be read + */ + public static Bitstream findDepositLicense(Context context, Item item) + throws SQLException, IOException, AuthorizeException + { + // get license format ID + int licenseFormatId = -1; + BitstreamFormat bf = BitstreamFormat.findByShortDescription(context, + "License"); + if (bf != null) + licenseFormatId = bf.getID(); + + Bundle[] bundles = item.getBundles(Constants.LICENSE_BUNDLE_NAME); + for (int i = 0; i < bundles.length; i++) + { + // Assume license will be in its own bundle + Bitstream[] bitstreams = bundles[i].getBitstreams(); + + if (bitstreams[0].getFormat().getID() == licenseFormatId) + { + return bitstreams[0]; + } + } + + // Oops! No license! + return null; + } + } Index: src/org/dspace/core/Constants.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/core/Constants.java,v retrieving revision 1.17 diff -b -w -u -r1.17 Constants.java --- src/org/dspace/core/Constants.java 13 Feb 2006 10:33:49 -0000 1.17 +++ src/org/dspace/core/Constants.java 4 Jan 2007 08:08:34 -0000 @@ -72,11 +72,21 @@ /** Type of individual eperson objects */ public static final int EPERSON = 7; + /** Type of ingestion wrapper for Item */ + public static final int INGESTION_ITEM = 8; + + /** Type of ingestion wrapper for Collection */ + public static final int INGESTION_COLLECTION = 9; + + /** Type of ingestion wrapper for Community */ + public static final int INGESTION_COMMUNITY = 10; + /** * lets you look up type names from the type IDs */ public static final String[] typeText = { "BITSTREAM", "BUNDLE", "ITEM", - "COLLECTION", "COMMUNITY", "SITE", "GROUP", "EPERSON", }; + "COLLECTION", "COMMUNITY", "SITE", "GROUP", "EPERSON", + "INGESTION_ITEM", "INGESTION_COLLECTION", "INGESTION_COMMUNITY" }; /** * Special Bundle and Bitstream Names: Index: src/org/dspace/core/Utils.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/core/Utils.java,v retrieving revision 1.13 diff -b -w -u -r1.13 Utils.java --- src/org/dspace/core/Utils.java 30 Nov 2006 01:01:15 -0000 1.13 +++ src/org/dspace/core/Utils.java 4 Jan 2007 08:08:34 -0000 @@ -52,6 +52,13 @@ import java.util.Random; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.Date; +import java.util.Calendar; +import java.util.GregorianCalendar; +import java.text.SimpleDateFormat; +import java.text.ParseException; + +import org.apache.log4j.Logger; /** * Utility functions for DSpace. @@ -61,6 +68,9 @@ */ public class Utils { + /** log4j logger */ + private static Logger log = Logger.getLogger(Utils.class); + private static final Pattern DURATION_PATTERN = Pattern .compile("(\\d+)([smhdwy])"); @@ -82,6 +92,31 @@ private static VMID vmid = new VMID(); + // for parseISO8601Date + private static SimpleDateFormat parseFmt[] = + { + // first try at parsing, has milliseconds (note General time zone) + new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSSz"), + + // second try at parsing, no milliseconds (note General time zone) + new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ssz"), + + + // finally, try without any timezone (defaults to current TZ) + new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSS"), + + new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss") + }; + + // for formatISO8601Date + // output canonical format (note RFC22 time zone, easier to hack) + private static SimpleDateFormat outFmtSecond = new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ssZ"); + + // output format with millsecond precision + private static SimpleDateFormat outFmtMillisec = new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSSZ"); + + private static Calendar outCal = GregorianCalendar.getInstance(); + /** Private Constructor */ private Utils() { @@ -350,4 +385,64 @@ return qint * multiplier; } + + /** + * Translates timestamp from an ISO 8601-standard format, which + * is commonly used in XML and RDF documents. + * This method is synchronized because it depends on a non-reentrant + * static DateFormat (more efficient than creating a new one each call). + * + * @param s the input string + * @return Date object, or null if there is a problem translating. + */ + public static synchronized Date parseISO8601Date(String s) + { + // attempt to normalize the timezone to something we can parse; + // SimpleDateFormat can't handle "Z" + char tzSign = s.charAt(s.length()-6); + if (s.endsWith("Z")) + s = s.substring(0, s.length()-1) + "GMT+00:00"; + + // check for trailing timezone + else if (tzSign == '-' || tzSign == '+') + s = s.substring(0, s.length()-6) + "GMT" + s.substring(s.length()-6); + + // try to parse without millseconds + ParseException lastError = null; + for (int i = 0; i < parseFmt.length; ++i) + { + try + { + return parseFmt[i].parse(s); + } + catch (ParseException e) + { + lastError = e; + } + } + if (lastError != null) + log.error("Error parsing date:", lastError); + return null; + } + + /** + * Convert a Date to String in the ISO 8601 standard format. + * The RFC822 timezone is almost right, still need to insert ":". + * This method is synchronized because it depends on a non-reentrant + * static DateFormat (more efficient than creating a new one each call). + * + * @param d the input Date + * @return String containing formatted date. + */ + public static synchronized String formatISO8601Date(Date d) + { + String result; + outCal.setTime(d); + if (outCal.get(Calendar.MILLISECOND) == 0) + result = outFmtSecond.format(d); + else + result = outFmtMillisec.format(d); + int rl = result.length(); + return result.substring(0, rl-2) + ":" + result.substring(rl-2); + } } Index: src/org/dspace/handle/HandleManager.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/handle/HandleManager.java,v retrieving revision 1.19 diff -b -w -u -r1.19 HandleManager.java --- src/org/dspace/handle/HandleManager.java 26 May 2006 14:18:41 -0000 1.19 +++ src/org/dspace/handle/HandleManager.java 4 Jan 2007 08:08:34 -0000 @@ -48,6 +48,7 @@ import org.dspace.content.Community; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; +import org.dspace.content.Site; import org.dspace.core.ConfigurationManager; import org.dspace.core.Constants; import org.dspace.core.Context; @@ -129,21 +130,16 @@ } /** - * Returns displayable string of the handle's 'temporary' URL - * http://hdl.handle.net/handle/em>. - * - * No attempt is made to verify that handle is in fact valid. - * - * @param handle The handle - * @return The canonical form - */ - - // public static String getURLForm(String handle) - // { - // return "http://hdl.handle.net/" + handle; - // } - /** - * Creates a new handle in the database. + * Create a new Handle entry by finding the highest number in an + * existing Handle suffix and adding one. + * Since the handle table may include Handles from restored or + * ingested AIPs there is no relationship between the primary key + * and the handle suffix, as used to be assumed in the old + * implementation of this function. Now we have to grovel over + * the whole table to find the highest existing Handle suffix. + * Fortunately this is fairly efficient (~50 mSec on a typical server for + * 10,000 Handles) and it only gets called once in the lifecycle of + * each newly submitted object. * * @param context * DSpace context @@ -156,53 +152,171 @@ public static String createHandle(Context context, DSpaceObject dso) throws SQLException { - TableRow handle = DatabaseManager.create(context, "Handle"); - String handleId = createId(handle.getIntColumn("handle_id")); + // get prefix without trailing '/' if any.. + String handlePrefix = ConfigurationManager.getProperty("handle.prefix"); + if (handlePrefix.endsWith("/")) + handlePrefix = handlePrefix.substring(0, handlePrefix.length()-1); + + /* + * XXX FIXME: Note that this contains a race condition; while we + * run the query and compute a new Handle suffix, another process + * might be doing doing the same thing and reaching the same + * conclusion. The second transaction will fail because the handle + * column has a unique constraint, so it will not corrupt the table. + * The ideal solution would be to wrap a lock like: + * LOCK TABLE handle IN ACCESS EXCLUSIVE MODE; + * around the SELECT and subsequent INSERT, but this deadlocks because + * the existing Context's transaction already a read-lock on the + * Handle table, with no way to release it. I don't believe it's + * worth making the "ACCESS EXCLUSIVE" lock in the existing + * context since (a) there is no telling how long that transaction + * may stay open or even if it gets closed at all; (b) it blocks + * ALL other threads that need ANY access to the "handle" table for + * an unnecessarily long time in any case. As a compromise, keeping + * the race condition within the smallest possible window seems the + * best of some poor alternatives. + * + * Use a temporary context so the new Handle entry is determined + * and created within as small a time as possible, to narrow the + * window for this race condition. + */ + Context tempContext = new Context(); + try + { + // XXX FIXME: this is only implemented for PostgreSQL; + // no Oracle server available to test on. + + // find largest existing suffix. + final String query = + "SELECT MAX(TO_NUMBER(SPLIT_PART(handle,'/',2),'999999999')) "+ + "AS maxid FROM handle WHERE handle LIKE ? ;"; + TableRow maxRow = DatabaseManager.querySingle(tempContext, query, handlePrefix+"/%"); + int maxID = -1; + if (maxRow != null) + maxID = maxRow.getIntColumn("maxid"); + + // if no max ID could be computed, check whether table is empty, + // in which case we can take the first ID. + if (maxID < 0) + { + TableRow ctRow = DatabaseManager.querySingle(tempContext, + "SELECT COUNT(handle_id) AS count FROM handle;"); + if (ctRow == null || ctRow.getLongColumn("count") != 0) + throw new SQLException("Failed finding maximum of Handle suffixes, and handle table not empty; check log."); + maxID = 0; + } + + // assemble new Handle + ++maxID; + String newHandle = new StringBuffer(handlePrefix) + .append("/").append(String.valueOf(maxID)).toString(); - handle.setColumn("handle", handleId); + TableRow handle = DatabaseManager.create(tempContext, "Handle"); + handle.setColumn("handle", newHandle); handle.setColumn("resource_type_id", dso.getType()); handle.setColumn("resource_id", dso.getID()); - DatabaseManager.update(context, handle); - + DatabaseManager.update(tempContext, handle); if (log.isDebugEnabled()) + log.debug("Created new handle \""+newHandle+"\" for "+dso.toString()); + return newHandle; + } + catch (SQLException e) { - log.debug("Created new handle for " - + Constants.typeText[dso.getType()] + " " + handleId); + log.error("Got SQL error allocating hew Handle suffix: ",e); + tempContext.abort(); + tempContext = null; + throw e; + } + finally + { + if (tempContext != null) + tempContext.complete(); } - - return handleId; } /** - * Creates a handle entry, but with a handle supplied by the caller (new - * Handle not generated) + * Creates a handle entry, or updates one if it already exists, + * for a handle supplied by the caller. It is an error to rebind + * a Handle that is occupied by an object that actualy exists, + * although deleted objects are overwritten automatically. + * + * If the object is null it creates or sets an "unbound" handle. + * This is required by the AIP system when it is in the process of + * restoring internal AIPs. The concept of an unbound handle is + * ALSO needed to save the spot in the Handle table after an Item is + * deleted, so that Handle doesn't get reused inadvertently on a + * different resource. * * @param context * DSpace context * @param dso - * DSpaceObject + * DSpaceObject - MAY be null to make handle unbound. * @param suppliedHandle - * existing handle value + * existing handle, must be a valid string. * @return the Handle */ public static String createHandle(Context context, DSpaceObject dso, String suppliedHandle) throws SQLException { - TableRow handle = DatabaseManager.create(context, "Handle"); - String handleId = suppliedHandle; + // replace existing handle if there is one, since it may have + // e.g. been created by InternalAIP during restoration. + TableRow row = DatabaseManager.findByUnique(context, "Handle", + "handle", suppliedHandle); - handle.setColumn("handle", handleId); - handle.setColumn("resource_type_id", dso.getType()); - handle.setColumn("resource_id", dso.getID()); - DatabaseManager.update(context, handle); + // if no existing handle found, create a new entry. + if (row == null) + { + row = DatabaseManager.create(context, "Handle"); + row.setColumn("handle", suppliedHandle); + } + // otherwise make sure this handle is not bound to an existing object + else if (!(row.isColumnNull("resource_type_id") && + row.isColumnNull("resource_id"))) + { + DSpaceObject oldDso = resolveToObject(context, suppliedHandle); + if (oldDso != null) + throw new SQLException("Cannot rebind Handle; handle "+suppliedHandle+" is already bound to another object: "+oldDso.toString()); + } + + if (dso == null) + { + row.setColumnNull("resource_type_id"); + row.setColumnNull("resource_id"); if (log.isDebugEnabled()) + log.debug("Created unbound handle "+suppliedHandle); + } + else { - log.debug("Created new handle for " - + Constants.typeText[dso.getType()] + " " + handleId); + row.setColumn("resource_type_id", dso.getType()); + row.setColumn("resource_id", dso.getID()); + if (log.isDebugEnabled()) + log.debug("Created predetermined handle "+suppliedHandle+" for "+dso.toString()); + } + DatabaseManager.update(context, row); + return suppliedHandle; } - return handleId; + /** + * Removes binding of Handle to a DSpace object, while leaving the + * Handle in the table so it doesn't get reallocated. The AIP + * implementation also needs it there for foreign key references. + * + * @param context DSpace context + * @param dso DSpaceObject whose Handle to unbind. + */ + public static void unbindHandle(Context context, DSpaceObject dso) + throws SQLException + { + TableRow row = getHandleInternal(context, dso.getType(), dso.getID()); + if (row != null) + { + row.setColumnNull("resource_type_id"); + row.setColumnNull("resource_id"); + DatabaseManager.update(context, row); + } + else + log.warn("Cannot find Handle entry to unbind for object="+dso.toString()); } /** @@ -225,13 +339,18 @@ if (dbhandle == null) { + if (handle.equals(Site.getSiteHandle())) + return Site.find(context, 0); return null; } + // handles can be left unbound intentionally, e.g. for + // internal AIPs of deleted or not-yet-existant objects. if ((dbhandle.isColumnNull("resource_type_id")) || (dbhandle.isColumnNull("resource_id"))) { - throw new IllegalStateException("No associated resource type"); + log.debug("Request for unbound Handle, handle="+handle); + return null; } // What are we looking at here? @@ -293,10 +412,16 @@ public static String findHandle(Context context, DSpaceObject dso) throws SQLException { - // if (!(obj instanceof Item)) - // return null; - // Item item = (Item) obj; - return getHandleInternal(context, dso.getType(), dso.getID()); + TableRow row = getHandleInternal(context, dso.getType(), dso.getID()); + if (row == null) + { + if (dso.getType() == Constants.SITE) + return Site.getSiteHandle(); + else + return null; + } + else + return row.getStringColumn("handle"); } /** @@ -329,6 +454,41 @@ return results; } + /** + * return Handle corresponding to database ID + * inverse of getID() + * + * @param context DSpace context + * @param id database row of the Handle entry + * @return The handle in this row, or null there is no such row. + * @exception SQLException + * If a database error occurs + */ + public static String find(Context context, int id) + throws SQLException + { + TableRow row = DatabaseManager.find(context, "Handle", id); + return (row == null) ? null : row.getStringColumn("handle"); + } + + /** + * Returns database row number of this Handle's record, or -1 if not found. + * Inverse of find() + * + * @param context DSpace context + * @param handle The handle in this row, or null there is no such row. + * @return database row number of the Handle entry + * @exception SQLException + * If a database error occurs + */ + public static int getID(Context context, String handle) + throws SQLException + { + TableRow row = DatabaseManager.findByUnique(context, "Handle", + "handle", handle); + return (row == null) ? -1 : row.getIntColumn("handle_id"); + } + //////////////////////////////////////// // Internal methods //////////////////////////////////////// @@ -346,15 +506,12 @@ * @exception SQLException * If a database error occurs */ - private static String getHandleInternal(Context context, int type, int id) + private static TableRow getHandleInternal(Context context, int type, int id) throws SQLException { - String sql = "SELECT handle FROM Handle WHERE resource_type_id = ? " + + String sql = "SELECT * FROM Handle WHERE resource_type_id = ? " + "AND resource_id = ?"; - - TableRow row = DatabaseManager.querySingle(context, sql,type,id); - - return (row == null) ? null : row.getStringColumn("handle"); + return DatabaseManager.querySingleTable(context, "Handle", sql, type, id); } /** @@ -379,20 +536,4 @@ return DatabaseManager .findByUnique(context, "Handle", "handle", handle); } - - /** - * Create a new handle id. The implementation uses the PK of the RDBMS - * Handle table. - * - * @return A new handle id - * @exception SQLException - * If a database error occurs - */ - private static String createId(int id) throws SQLException - { - String handlePrefix = ConfigurationManager.getProperty("handle.prefix"); - - return new StringBuffer().append(handlePrefix).append( - handlePrefix.endsWith("/") ? "" : "/").append(id).toString(); - } } Index: src/org/dspace/storage/bitstore/BitstreamStorageManager.java =================================================================== RCS file: /cvsroot/dspace/dspace/src/org/dspace/storage/bitstore/BitstreamStorageManager.java,v retrieving revision 1.22 diff -b -w -u -r1.22 BitstreamStorageManager.java --- src/org/dspace/storage/bitstore/BitstreamStorageManager.java 4 Sep 2006 12:48:51 -0000 1.22 +++ src/org/dspace/storage/bitstore/BitstreamStorageManager.java 4 Jan 2007 08:08:34 -0000 @@ -42,6 +42,8 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.PrintStream; +import java.io.UnsupportedEncodingException; import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -49,14 +51,20 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.net.URI; +import java.net.URLEncoder; +import java.net.URLDecoder; import org.apache.log4j.Logger; import org.dspace.checker.BitstreamInfoDAO; +import org.dspace.content.Bitstream; import org.dspace.core.ConfigurationManager; import org.dspace.core.Context; +import org.dspace.core.LogManager; import org.dspace.core.Utils; import org.dspace.storage.rdbms.DatabaseManager; import org.dspace.storage.rdbms.TableRow; +import org.dspace.storage.rdbms.TableRowIterator; import edu.sdsc.grid.io.FileFactory; import edu.sdsc.grid.io.GeneralFile; @@ -413,6 +421,27 @@ // get a reference to the file GeneralFile file = getFile(bitstream); + // fill in size, checksum, etc. + finishBitstreamEntry(context, bitstream, file, bitstreamPath); + DatabaseManager.update(context, bitstream); + + int bitstream_id = bitstream.getIntColumn("bitstream_id"); + if (log.isDebugEnabled()) + { + log.debug("Stored bitstream " + bitstream_id + " in file " + + file.getAbsolutePath()); + } + return bitstream_id; + } + + + private static void finishBitstreamEntry(Context context, + TableRow bitstream, + GeneralFile file, + String bitstreamPath) + throws SQLException, IOException + { + // read through a DigestInputStream that will work out the MD5 // // DSpace refers to checksum, writes it in METS, and uses it as an @@ -501,15 +530,6 @@ bitstream.setColumn("checksum_algorithm", "MD5"); bitstream.setColumn("size_bytes", file.length()); bitstream.setColumn("deleted", false); - DatabaseManager.update(context, bitstream); - - int bitstream_id = bitstream.getIntColumn("bitstream_id"); - if (log.isDebugEnabled()) - { - log.debug("Stored bitstream " + bitstream_id + " in file " - + file.getAbsolutePath()); - } - return bitstream_id; } /** @@ -694,6 +714,19 @@ } } + + /** + * Get an iterator of bitstream rows that haven't been deleted, + * for Bitstream.findall() method. + * @return table rows of undeleted bitstreams. + */ + public static TableRowIterator findAll(Context context) + throws SQLException + { + String myQuery = "SELECT * FROM Bitstream where deleted = 'f';"; + return DatabaseManager.queryTable(context, "Bitstream", myQuery); + } + //////////////////////////////////////// // Internal methods //////////////////////////////////////// @@ -858,4 +891,271 @@ return buf.toString(); } + /** + * Return an absolute URI for this bitstream, in format that can be + * used to locate the exact content again. Note that it MAY only + * be possible for BitstreamStorageManager to dereference that URI, + * and it must be done in the same asset store that generated it. + *

+ * Formats are: (for local, SRB, and registered versions respectively) + * info:/dspace/asset/file## + * info:/dspace/asset/srb## + * info:/dspace/registered/file## + * info:/dspace/registered/srb## + * + * @returns URI for the content of this bitstream. + */ + public static URI getAbsoluteURI(TableRow bitstream) + { + // Check that bitstream is not null + if (bitstream == null) + return null; + + // Get the store to use + // Default to zero ('assetstore.dir') for backwards compatibility + int storeNumber = bitstream.getIntColumn("store_number"); + if (storeNumber == -1) + storeNumber = 0; + GeneralFile assetstore = assetStores[storeNumber]; + + String iid = bitstream.getStringColumn("internal_id"); + String subspace = "asset"; + + // there are 4 cases: + // -conventional bitstream, conventional storage + // -conventional bitstream, srb storage + // -registered bitstream, conventional storage + // -registered bitstream, srb storage + if (isRegisteredBitstream(iid)) + { + iid = iid.substring(REGISTERED_FLAG.length()); + subspace = "registered"; + } + try + { + return URI.create("info:/dspace/"+subspace+ + ((assetstore instanceof LocalFile) ? "/file#" : "/srb#")+ + URLEncoder.encode(assetstore.toString(),"UTF-8")+ + ":"+ + URLEncoder.encode(iid,"UTF-8")); + } + catch (UnsupportedEncodingException e) + { + log.error("Failed creating URI for bitstream: "+e.toString()); + } + return null; + } + + /** + * Translate bitstream absolute URI into values in bitstream row. + * Sets store_number and internal_id columns in the row. + * Format: + * info:/dspace/{subspace}/{type}#{assetstore}:{internal_id} + * ..where subspace = 'asset' | 'registered' + * type = 'file' | 'srb' + * + */ + public static TableRow dereferenceAbsoluteURI(Context context, URI uri) + throws IllegalArgumentException, SQLException + { + try + { + boolean registered; + boolean isFile; + + String path = uri.getPath(); + if (!path.startsWith("/dspace/")) + throw new IllegalArgumentException("Path does not start with /dspace/: "+uri.toString()); + String rest = path.substring(8); + if (rest.startsWith("asset/")) + { + registered = false; + rest = rest.substring(6); + } + else if (rest.startsWith("registered/")) + { + registered = true; + rest = rest.substring(11); + } + else + throw new IllegalArgumentException("Path does have registered|asset element: "+uri.toString()); + + if (rest.equals("file")) + isFile = true; + else if (rest.equals("srb")) + isFile = false; + else + throw new IllegalArgumentException("Path does have file|srb element: "+uri.toString()); + + String frag = uri.getRawFragment(); + int colon = frag.indexOf(":"); + if (colon < 0) + throw new IllegalArgumentException("Malformed fragment in bitstream URI, no colon: "+uri.toString()); + String prefix = URLDecoder.decode(frag.substring(0, colon),"UTF-8"); + String iid = URLDecoder.decode(frag.substring(colon+1),"UTF-8"); + + // find prefix + int storeNumber = -1; + for (int i = 0; i < assetStores.length; ++i) + { + if (assetStores[i].toString().equals(prefix)) + storeNumber = i; + } + if (storeNumber < 0) + throw new IllegalArgumentException("Prefix \""+prefix+"\" does not match any configure Asset Store: "+uri.toString()); + + return findBitstreamByStorage(context, storeNumber, iid); + } + catch (UnsupportedEncodingException e) + { + log.error("Failed creating URI for bitstream: "+e.toString()); + } + return null; + } + + /** + * Check and optionally restore the Bitstream table entry for each + * asset file. Crawls over all files in the asset store, in case + * the RDBMS is lost, out of date, or corrupted. + * This can also be used to restore an archive from the state in the + * asset store if the RDBMS is lost -- first restore the bitstreams, + * then rebuild the object model from internal AIPs (see AIPManager). + * + * @param context the usual + * @param restore when true, attempt to restore asset files without + * entries in the bitstream table. + * @param diagStream if not null, write diagnostic messages here. + * + */ + public static void checkAssetStores(Context context, boolean restore, + boolean verbose, PrintStream diagStream) + throws IOException, SQLException + { + for (int i = 0; i < assetStores.length; ++i) + // XXX FIXME do we check SRB-based assetstores the same way? + checkAssetStoreDir(context, i, assetStores[i], directoryLevels, + restore, verbose, diagStream); + } + + // recursive check of one directory; go down dirLevels of subdirs. + private static void checkAssetStoreDir(Context context, + int storeNumber, + GeneralFile root, + int dirLevels, + boolean restore, + boolean verbose, + PrintStream diagStream) + throws IOException, SQLException + { + String name[] = root.list(); + for (int i = 0; i < name.length; ++i) + { + // GeneralFile interface seems to skip over these.. + if (name[i].startsWith(".")) + { + log.warn("Got dotfile, root="+root+", file="+name[i]); + continue; + } + // XXX FIXME this is horrible, but expedient. + // It works fine on local assetstores. + // Needs to be tested against an SRB store. + // NOTE: the GeneralFile constructor does not work in the + // version of jargon API we use, hence this kludge. + GeneralFile child = null; + if (root instanceof LocalFile) + child = new LocalFile((LocalFile)root, name[i]); + else if (root instanceof SRBFile) + child = new SRBFile((SRBFile) root, name[i]); + else + throw new IOException("Got GeneralFile of unknown type: "+root.toString()); + + // recurse through directory levels + if (dirLevels > 0) + { + if (child.isDirectory()) + checkAssetStoreDir(context, storeNumber, child, dirLevels-1, restore, verbose, diagStream); + else + { + log.warn("Found plain file in dir hierarchy: "+child.toString()); + if (diagStream != null) + diagStream.println("Found plain file in dir hierarchy: "+child.toString()); + } + } + + // expect files + else + { + if (child.isDirectory()) + { + if (log.isDebugEnabled()) + log.debug("Found directory at file level of hierarchy: "+child.toString()); + if (diagStream != null) + diagStream.println("Found directory at file level of hierarchy: "+child.toString()); + } + else + { + TableRow bsRow = findBitstreamByStorage(context, storeNumber, name[i]); + if (bsRow == null) + { + if (restore) + { + TableRow nRow = DatabaseManager.create(context, "Bitstream"); + nRow.setColumn("internal_id", name[i]); + nRow.setColumn("store_number", storeNumber); + + // fill in size, checksum, etc. + finishBitstreamEntry(context, nRow, child, + child.getPath()); + DatabaseManager.update(context, nRow); + + if (log.isDebugEnabled() || (verbose && diagStream != null)) + { + int bitstream_id = nRow.getIntColumn("bitstream_id"); + String msg = "Restored bitstream entry, id=" + bitstream_id + " for file " + + child.getAbsolutePath(); + log.debug(msg); + if (verbose && diagStream != null) + diagStream.println(msg); + } + } + else + { + String msg = "WARNING, asset file not in Bitstream table: store_number="+String.valueOf(storeNumber)+ + ", internal_id=\""+name[i]+"\""; + log.warn(msg); + if (diagStream != null) + diagStream.println(msg); + } + } + else + { + if (verbose && diagStream != null) + diagStream.println("success, asset file is in Bitstream table: store_number="+String.valueOf(storeNumber)+ + ", internal_id=\""+name[i]+"\""); + } + } + } + } + } + + private static TableRow findBitstreamByStorage(Context context, + int storeNumber, String internalID) + throws SQLException + { + Integer sn = new Integer(storeNumber); + Object[] parameters = {sn, internalID}; + TableRow result = DatabaseManager.querySingleTable(context, + "Bitstream", + "SELECT * FROM Bitstream WHERE store_number = ? AND internal_id = ?;", + parameters); + + if (log.isDebugEnabled()) + { + if (result == null) + log.debug(LogManager.getHeader(context, "find_bitstream", "not_found,store_number="+String.valueOf(storeNumber)+", internal_id="+internalID)); + else + log.debug(LogManager.getHeader(context, "find_bitstream", "store_number="+String.valueOf(storeNumber)+", internal_id="+internalID)); + } + return result; + } }