To support horizontal scalability use cases as well as geographic distribution, Fedora 4 can be configured as a cluster of application servers. |
This feature is still undergoing development. |
The are still some issues present in Fedora 4 which may lead to partial ingests, due to synchronization timeouts. This can be partialy mitigated by increasing the replTimeout property in infinispan.xml. |
Fedora 4 is built in top of the JCR implementation Modeshape. Modeshape uses Infinispan as a distributed datastore, which in turn uses the Messaging Toolkit JGroups to transfer state between nodes.
Therefore the following resources and documentatios contain a lot of important information about configuring Fedora 4's underlying projects.
A couple of configuration options have to be set in order to have Fedora 4 work as a cluster on a local machine:
In order to use the UDP Multicasting for node discovery and TCP for replication the following JGroups example configuration can be used:
<config xmlns="urn:org:jgroups" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:org:jgroups http://www.jgroups.org/schema/JGroups-3.0.xsd"> <TCP bind_port="7800" loopback="false" recv_buf_size="${tcp.recv_buf_size:5M}" send_buf_size="${tcp.send_buf_size:640K}" max_bundle_size="64K" max_bundle_timeout="30" use_send_queues="true" sock_conn_timeout="300" timer_type="new3" timer.min_threads="4" timer.max_threads="10" timer.keep_alive_time="3000" timer.queue_max_size="500" thread_pool.enabled="true" thread_pool.min_threads="1" thread_pool.max_threads="10" thread_pool.keep_alive_time="5000" thread_pool.queue_enabled="true" thread_pool.queue_max_size="10000" thread_pool.rejection_policy="discard" oob_thread_pool.enabled="true" oob_thread_pool.min_threads="1" oob_thread_pool.max_threads="8" oob_thread_pool.keep_alive_time="5000" oob_thread_pool.queue_enabled="false" oob_thread_pool.queue_max_size="100" oob_thread_pool.rejection_policy="discard"/> <MPING timeout="1000" num_initial_members="1"/> <MERGE2 max_interval="30000" min_interval="10000"/> <FD_ALL timeout="150000"/> <VERIFY_SUSPECT timeout="150000" /> <BARRIER /> <pbcast.NAKACK2 use_mcast_xmit="false" discard_delivered_msgs="true"/> <UNICAST timeout="600,900,2500"/> <pbcast.STABLE stability_delay="2000" desired_avg_gossip="50000" max_bytes="4M"/> <pbcast.GMS print_local_addr="true" join_timeout="6000" view_bundling="true"/> <MFC max_credits="2M" min_threshold="0.4"/> <FRAG2 frag_size="60K" /> <pbcast.STATE_TRANSFER /> </config> |
The following example configuration has it's replication timeout set to 10 minutes in order to mitigate the problem of SyncTimeouts when spanning one transaction over a lot of operations
<infinispan xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:infinispan:config:5.2 http://www.infinispan.org/schemas/infinispan-config-5.2.xsd" xmlns="urn:infinispan:config:5.2"> <global> <globalJmxStatistics enabled="true" allowDuplicateDomains="true"/> <transport clusterName="modeshape-cluster"> <properties> <property name="configurationFile" value="${fcrepo.ispn.jgroups.configuration:config/jgroups-fcrepo-tcp.xml}"/> </properties> </transport> </global> <default> <clustering mode="distribution"> <sync replTimeout="600000"/> <l1 enabled="false" lifespan="0" onRehash="false"/> <hash numOwners="${fcrepo.ispn.numOwners:2}"/> <stateTransfer chunkSize="100" fetchInMemoryState="true"/> </clustering> </default> <namedCache name="FedoraRepository"> <clustering mode="replication"> <sync replTimeout="6000000"/> <l1 enabled="false" lifespan="0" onRehash="false"/> <stateTransfer chunkSize="100" fetchInMemoryState="true" timeout="120000"/> </clustering> <locking isolationLevel="READ_COMMITTED" writeSkewCheck="false" lockAcquisitionTimeout="150000" useLockStriping="true" /> <transaction transactionMode="TRANSACTIONAL" lockingMode="PESSIMISTIC"/> <loaders passivation="false" shared="false" preload="false"> <loader class="org.infinispan.loaders.file.FileCacheStore" fetchPersistentState="true" purgeOnStartup="false"> <properties> <property name="location" value="${fcrepo.ispn.repo.CacheDirPath:target/FedoraRepository/storage}"/> <property name="fsyncMode" value="perWrite"/> </properties> </loader> </loaders> </namedCache> <namedCache name="FedoraRepositoryMetaData"> <clustering mode="distribution"> <sync replTimeout="600000"/> <l1 enabled="false" lifespan="0" onRehash="false"/> <hash numOwners="${fcrepo.ispn.numOwners:2}"/> <stateTransfer chunkSize="100" fetchInMemoryState="true"/> </clustering> <locking concurrencyLevel="1000" lockAcquisitionTimeout="150000" useLockStriping="false" /> <deadlockDetection enabled="true" spinDuration="1000"/> <eviction maxEntries="500" strategy="LIRS" threadPolicy="DEFAULT"/> <transaction transactionManagerLookupClass="org.infinispan.transaction.lookup.GenericTransactionManagerLookup" transactionMode="TRANSACTIONAL" lockingMode="PESSIMISTIC"/> <loaders passivation="false" shared="false" preload="false"> <loader class="org.infinispan.loaders.file.FileCacheStore" fetchPersistentState="true" purgeOnStartup="false"> <properties> <property name="location" value="${fcrepo.ispn.CacheDirPath:target/FedoraRepositoryMetaData/storage}"/> <property name="fsyncMode" value="perWrite"/> </properties> </loader> </loaders> </namedCache> <namedCache name="FedoraRepositoryBinaryData"> <clustering mode="distribution"> <sync replTimeout="600000"/> <l1 enabled="false" lifespan="0" onRehash="false"/> <hash numOwners="${fcrepo.ispn.numOwners:2}"/> <stateTransfer chunkSize="100" fetchInMemoryState="true"/> </clustering> <locking concurrencyLevel="1000" lockAcquisitionTimeout="150000" useLockStriping="false" /> <deadlockDetection enabled="true" spinDuration="1000"/> <eviction maxEntries="100" strategy="LIRS" threadPolicy="DEFAULT"/> <transaction transactionManagerLookupClass="org.infinispan.transaction.lookup.GenericTransactionManagerLookup" transactionMode="TRANSACTIONAL" lockingMode="PESSIMISTIC"/> <loaders passivation="false" shared="false" preload="false"> <loader class="org.infinispan.loaders.file.FileCacheStore" fetchPersistentState="true" purgeOnStartup="false"> <properties> <property name="location" value="${fcrepo.ispn.binary.CacheDirPath:target/FedoraRepositoryBinaryData/storage}"/> <property name="fsyncMode" value="perWrite"/> </properties> </loader> </loaders> </namedCache> </infinispan> |
The following configuration has indexing disabled completely in order to increase ingest performance
{ "name" : "repo", "jndiName" : "", "workspaces" : { "predefined" : ["default"], "default" : "default", "allowCreation" : true }, "clustering" : { "clusterName" : "modeshape-cluster" }, "query" : { "enabled" : "false", }, "storage" : { "cacheName" : "FedoraRepository", "cacheConfiguration" : "${fcrepo.infinispan.cache_configuration:config/infinispan/clustered/infinispan.xml}", "binaryStorage" : { "type" : "cache", "dataCacheName" : "FedoraRepositoryBinaryData", "metadataCacheName" : "FedoraRepositoryMetaData" } }, "security" : { "anonymous" : { "roles" : ["readonly","readwrite","admin"], "useOnFailedLogin" : false }, "providers" : [ { "classname" : "org.fcrepo.http.commons.session.BypassSecurityServletAuthenticationProvider" } ] }, "node-types" : ["fedora-node-types.cnd"] } |
Download Tomcat 7.0.50 and unpack it
#> wget http://mirror.synyx.de/apache/tomcat/tomcat-7/v7.0.50/bin/apache-tomcat-7.0.50.tar.gz #> tar -zxvf apache-tomcat-7.0.50.tar.gz #> mv apache-tomcat-7.0.50 tomcat7 |
Copy the fcrepo-webapp-VERSION.war file
#> cp fcrepo-webapp-VERSION.war tomcat7/webapps/fcrepo.war |
Use the following commands to set the buffer size
#> sysctl net.core.rmem_max=26214400 #> sysctl net.core.wmem_max=5242880 |
Using a custom configuration by pointing Fedora 4 to custom configuration files:
#> CATALINA_OPTS="-Xmx1024m -XX:MaxPermSize=256m -Djava.net.preferIPv4Stack=true -Djgroups.udp.mcast_addr=239.42.42.42 -Dfcrepo.modeshape.configuration=file:///path/to/repository.json -Dfcrepo.ispn.jgroups.configuration=/path/to/jgroups-fcrepo-tcp.xml -Dfcrepo.infinispan.cache_configuration=/path/to/infinispan.xml" bin/catalina.sh run |
Currently there are still issues using UDP Mulitcasting for replication, while using UDP for node discovery works as intended. |
A couple of configuration options have to be set in order to have Fedora 4 work as a cluster on a local machine:
Download Tomcat 7.0.50 and unpack it
#> wget http://mirror.synyx.de/apache/tomcat/tomcat-7/v7.0.50/bin/apache-tomcat-7.0.50.tar.gz #> tar -zxvf apache-tomcat-7.0.50.tar.gz #> mv apache-tomcat-7.0.50 tomcat7 |
Copy the fcrepo-webapp-VERSION.war file
#> cp fcrepo-webapp-VERSION.war tomcat7/webapps/fcrepo.war |
$TOMCAT_HOME/bin/setenv.sh
(create if necessary) using the properties fcrepo.modeshape.configuration,
fcrepo.ispn.jgroups.configuration
and fcrepo.infinispan.cache_configuration
Use the following commands to set the buffer size
#> sysctl net.core.rmem_max=5242880 #> sysctl net.core.wmem_max=5242880 |
Using the default clustered configuration (Replication mode):
#> CATALINA_OPTS="-Xmx1024m -XX:MaxPermSize=256m -Dfcrepo.modeshape.configuration=config/clustered/repository.json -Djava.net.preferIPv4Stack=true -Djgroups.udp.mcast_addr=239.42.42.42" bin/catalina.sh run |
Using a custom configuration by pointing Fedora 4 to custom configuration files:
#> CATALINA_OPTS="-Xmx1024m -XX:MaxPermSize=256m -Djava.net.preferIPv4Stack=true -Djgroups.udp.mcast_addr=239.42.42.42 -Dfcrepo.modeshape.configuration=file:///path/to/repository.json -Dfcrepo.ispn.jgroups.configuration=/path/to/jgroups-fedora-udp.xml -Dfcrepo.infinispan.cache_configuration=/path/to/infinispan.xml" bin/catalina.sh run |
Currently there are still issues using UDP Mulitcasting for replication, while using UDP for node discovery works as intended. |
A couple of configuration options have to be set in order to have Fedora 4 work as a cluster on a local machine:
Download Tomcat 7.0.50 and unpack it
#> wget http://mirror.synyx.de/apache/tomcat/tomcat-7/v7.0.50/bin/apache-tomcat-7.0.50.tar.gz #> tar -zxvf apache-tomcat-7.0.50.tar.gz #> mv apache-tomcat-7.0.50 tomcat7-8080 |
Copy the fcrepo-webapp-VERSION.war file
#> cp fcrepo-webapp-VERSION.war tomcat7-8080/webapps/fcrepo.war |
Download from github and put it into tomcat7-8080
#> wget -O infinispan.xml https://gist.github.com/fasseg/8646707/raw #> mv infinispan.xml tomcat7-8080/ |
Download from Github and put into tomcat7-8080
#> wget -O repository.json https://gist.github.com/fasseg/8646727/raw #> mv repository.json tomcat7-8080/ |
Download from Github and put into tomcat7-8080
#> wget -O jgroups-fedora-udp.xml https://gist.github.com/fasseg/8646743/raw #> mv jgroups-fedora-udp.xml tomcat7-8080/ |
Use the following commands to set the buffer size
#> sysctl net.core.rmem_max=5242880 #> sysctl net.core.wmem_max=5242880 |
#> cp -R tomcat7-8080/ tomcat7-8081 |
#> sed -i 's/8080/8081/g' tomcat7-8081/conf/server.xml #> sed -i 's/8005/8006/g' tomcat7-8081/conf/server.xml #> sed -i 's/8009/8010/g' tomcat7-8081/conf/server.xml |
#> CATALINA_OPTS="-Xmx1024m -XX:MaxPermSize=256m -Dfcrepo.modeshape.configuration=file:///path/to/repository.json -Djava.net.preferIPv4Stack=true -Dfcrepo.ispn.jgroups.configuration=/path/to/jgroups-fedora-udp.xml -Djgroups.udp.mcast_addr=239.42.42.42 -Dfcrepo.infinispan.cache_configuration=/path/to/infinispan.xml" tomcat7-8080/bin/catalina.sh run |
#> CATALINA_OPTS="-Xmx1024m -XX:MaxPermSize=256m -Dfcrepo.modeshape.configuration=file:///path/to/repository.json -Djava.net.preferIPv4Stack=true -Dfcrepo.ispn.jgroups.configuration=/path/to/jgroups-fedora-udp.xml -Djgroups.udp.mcast_addr=239.42.42.42 -Dfcrepo.infinispan.cache_configuration=/path/to/infinispan.xml" tomcat7-8081/bin/catalina.sh run |
#> wget http://localhost:8080/fcrepo/rest #> wget http://localhost:8081/fcrepo/rest |
Load balancing can be achieved by using an Apache server with mod_jk in front of the Fedora 4 cluster. Using mod_jk one has to create as many workers in the workers.properties configuration file as there are Fedora 4 nodes.
See this example on the RedHat pages
For pushing configurations and wars/jars, start, stop, restart and purge the Ubuntu 12.04 LTS cluster this small script gets used on the FIZ cluster.
In order to make this work without having to input passwords all the time for the sudo and ssh calls on the cluster nodes, I distributed a public ssh key on the cluster nodes for ssh auth and allowed the fcrepo user to execute sudo
calls to rm, cp, service
calls without a password.
/data/fcrepo
(the exploded war file, owned by fcrepo)/home/fcrepo/fiz-cluster-config
(the configuration and setenv.sh file, owned by fcrepo)/var/lib/tomcat7/webapps/fedora
(owned by root) symlinks to /data/fcrepo node[]
array will have to change for different configurations, as should the range defined in the for statements in start_cluster() purge_cluster()
and stop_cluster().
#!/bin/bash nodes[0]=192.168.42.101 nodes[1]=192.168.42.102 nodes[2]=192.168.42.103 nodes[3]=192.168.42.104 nodes[4]=192.168.42.105 nodes[5]=192.168.42.106 start_node() { if [[ -z "$1" ]]; then echo "No node argument suplied [1-6]" exit 1 fi nodeip=${nodes[(($1 - 1))]} echo -n "Starting node $nodeip..." ssh -qt fcrepo@$nodeip "sudo service tomcat7 start > /dev/null" curl -sf http://${nodeip}:8080/fcrepo/rest > /dev/null until [ $? -eq 0 ] do curl -sf http://${nodeip}:8080/fcrepo/rest > /dev/null done echo "done." } stop_node() { if [[ -z "$1" ]]; then echo "No node argument suplied [1-6]" exit 1 fi nodeip=${nodes[(($1 - 1))]} echo -n "Stopping node $nodeip..." ssh -qt fcrepo@$nodeip "sudo service tomcat7 stop > /dev/null" curl -sf http://${nodeip}:8080/fcrepo/rest > /dev/null until [ $? -gt 0 ] do curl -sf http://${nodeip}:8080/fcrepo/rest > /dev/null done echo "done." } restart_node() { stop_node $1 start_node $1 } start_cluster() { echo "Starting cluster" for node in 1 2 3 4 5 6 do start_node $node done } stop_cluster() { echo "Stopping cluster" for node in 1 2 3 4 5 6 do stop_node $node done } restart_cluster() { stop_cluster start_cluster } status() { echo "Status of FIZ Fedora 4 cluster" for nodeip in "${nodes[@]}" do curl -sf http://${nodeip}:8080/fcrepo/rest > /dev/null if [ $? -gt 0 ];then echo "$nodeip is OFFLINE" else echo "$nodeip is online" fi done } purge() { echo "purging cluster" for nodeip in "${nodes[@]}" do echo -n "purging ${nodeip}..." ssh -qt fcrepo@$nodeip "sudo rm -Rf /var/lib/tomcat7/fcrepo4-data/* /var/lib/tomcat7/work/Catalina/localhost/*" echo "done" done } push_config() { for nodeip in "${nodes[@]}" do echo "pushing config file $2 to $nodeip" scp $1 fcrepo@${nodeip}:fcrepo-config/ done } restart_purge() { stop_cluster purge start_cluster } push_war() { stop_cluster purge rm -Rf /tmp/fcrepo mkdir /tmp/fcrepo unzip -qq $1 -d /tmp/fcrepo for nodeip in "${nodes[@]}" do echo -n "pushing WAR file to ${nodeip}..." ssh -qt fcrepo@${nodeip} "sudo rm -Rf /tmp/fcrepo" scp -qr /tmp/fcrepo fcrepo@${nodeip}:/tmp ssh -qt fcrepo@${nodeip} "sudo rm -Rf /opt/fcrepo/*" ssh -qt fcrepo@${nodeip} "sudo mv /tmp/fcrepo /opt/" echo "done."; done } push_jar() { stop_cluster purge for nodeip in "${nodes[@]}" do echo -n "pushing JAR to ${nodeip}..." scp -q $1 fcrepo@${nodeip}:/opt/fcrepo/WEB-INF/lib echo "done." done } case "$1" in start) start_cluster ;; stop) stop_cluster ;; restart) restart_cluster ;; stop-node) stop_node $2 ;; start-node) start_node $2 ;; restart-node) restart_node $2 ;; status) status ;; purge) purge ;; push-config) push_config $2 ;; restart-purge) restart_purge ;; push-war) push_war $2 ;; push-jar) push_jar $2 ;; *) echo $"Usage: $0 {start|stop|restart|restart-purge|start-node|stop-node|restart-node|status|purge|push-config|push-war|push-jar}" exit 1 esac |