Skip to content

Commit

Permalink
improve parsing of albums
Browse files Browse the repository at this point in the history
  • Loading branch information
don-vip committed Feb 2, 2018
1 parent 57f09b2 commit 2001086
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 8 deletions.
24 changes: 19 additions & 5 deletions src/main/java/com/github/donvip/archscrap/ArchScrap.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -50,6 +52,13 @@ public class ArchScrap implements AutoCloseable {

private static final String BASE_URL = "http://basededonnees.archives.toulouse.fr/4DCGi/";

private static final Map<String, Integer> ALBUMS = new HashMap<>();
static {
ALBUMS.put("16Fi", 81);
ALBUMS.put("38Fi", 39);
ALBUMS.put("39Fi", 11);
}

// -- Hibernate
private final StandardServiceRegistry registry;
private final SessionFactory sessionFactory;
Expand Down Expand Up @@ -125,8 +134,14 @@ public void doCheck(String[] args) throws IOException {

private void checkFonds(Fonds f) {
if (f != null) {
List<Integer> missing = f.getMissingNotices(session);
LOGGER.info(f.getCote() + ": " + (missing.isEmpty() ? "OK" : "KO (missing: " + missing + ")"));
List<Integer> missing = f.getMissingNotices(session,
ALBUMS.containsKey(f.getCote()) ? ALBUMS.get(f.getCote()) : f.getExpectedNotices());
if (missing.isEmpty()) {
LOGGER.info(f.getCote() + ": OK");
} else {
int percent = (int) (100d * (double) missing.size() / (double) f.getExpectedNotices());
LOGGER.warn(f.getCote() + ": KO (missing "+percent+"%: " + missing + ")");
}
}
}

Expand Down Expand Up @@ -193,11 +208,10 @@ private void scrapFonds(String cote) throws IOException {

private void scrapFonds(Fonds f) throws IOException {
if (f != null && f.getNotices().size() < f.getExpectedNotices()) {
boolean fi16 = "16Fi".equals(f.getCote());
// We have less notices in database than expected
if (fi16) {
if (ALBUMS.containsKey(f.getCote())) {
// Load all albums notices
for (int i = 1; i <= 81; i++) {
for (int i = 1; i <= ALBUMS.get(f.getCote()); i++) {
Notice album = searchNotice(f, i);
if (album != null) {
for (int j = 1; searchNotice(f, i, j) != null; j++) {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/github/donvip/archscrap/Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ public static Notice parseNotice(Document desc, String cote) {
if (m.matches()) {
n.setTitle(m.group(2).trim());
} else {
if (!cote.matches("\\d+Fi/\\d+")) {
if (!cote.matches("\\d+Fi\\d+/\\d+")) {
LOGGER.error("Empty notice for {}", cote);
}
return null;
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/com/github/donvip/archscrap/domain/Fonds.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,17 @@ public void setReuseConditions(String reuseConditions) {
this.reuseConditions = reuseConditions;
}

@SuppressWarnings("unchecked")
public List<Integer> getMissingNotices(Session session) {
return getMissingNotices(session, expectedNotices);
}

@SuppressWarnings("unchecked")
public List<Integer> getMissingNotices(Session session, int max) {
// https://stackoverflow.com/a/48446303/2257172
return session.createNativeQuery(String.format(
"SELECT DISTINCT(id) FROM UNNEST (SEQUENCE_ARRAY((SELECT MIN(id) FROM Notices WHERE Notices.fonds_cote = '%s'), %d, 1)) SEQ(id)" +
"LEFT OUTER JOIN Notices ON Notices.id = SEQ.id WHERE NOT EXISTS(SELECT n.id FROM Notices n WHERE n.id = Notices.id AND n.fonds_cote = '%s')",
cote, expectedNotices, cote)).list();
cote, max, cote)).list();
}

@Override
Expand Down

0 comments on commit 2001086

Please sign in to comment.