Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
public class ChanRipper extends AbstractHTMLRipper {
private static List<ChanSite> explicit_domains = Arrays.asList(
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org")),
new ChanSite(Arrays.asList("archive.moe"), Arrays.asList("data.archive.moe")),
new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")),
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org"))
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")),
new ChanSite(Arrays.asList("8ch.net"), Arrays.asList("media.8ch.net")),
new ChanSite(Arrays.asList("yuki.la"), Arrays.asList("ii.yuki.la"))
);

private static List<String> url_piece_blacklist = Arrays.asList(
Expand Down Expand Up @@ -65,7 +68,7 @@ public String getAlbumTitle(URL url) throws MalformedURLException {
// Attempt to use album title as GID
Document doc = getFirstPage();
try {
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
String subject = doc.select(".subject").first().text();
return getHost() + "_" + getGID(url) + "_" + subject;
} catch (NullPointerException e) {
logger.warn("Failed to get thread title from " + url);
Expand All @@ -86,7 +89,8 @@ public boolean canRip(URL url) {
}
}
return url.toExternalForm().contains("/res/") // Most chans
|| url.toExternalForm().contains("/thread/"); // 4chan, archive.moe
|| url.toExternalForm().contains("/thread/") // 4chan, archive.moe
|| url.toExternalForm().contains("/"); // yuki.la, etc
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

|| url.toExternalForm().contains("/"); // yuki.la, etc is way to greedy. Try changing it to a regex that will only match yuki.la links.

}

/**
Expand All @@ -104,8 +108,8 @@ public String getGID(URL url) throws MalformedURLException {
Matcher m;

String u = url.toExternalForm();
if (u.contains("/thread/") || u.contains("/res/")) {
p = Pattern.compile("^.*\\.[a-z]{1,3}/[a-zA-Z0-9]+/(thread|res)/([0-9]+)(\\.html|\\.php)?.*$");
if (u.contains("/thread/") || u.contains("/res/") || u.contains("/")) {
p = Pattern.compile("^.*\\.[a-z]{1,3}/[a-zA-Z0-9]+(/thread/|/res/|/)([0-9]+)(\\.html|\\.php)?.*$");
m = p.matcher(u);
if (m.matches()) {
return m.group(2);
Expand Down